{ "best_metric": 0.08917281776666641, "best_model_checkpoint": "/content/drive/MyDrive/vit-cifar10/checkpoint-263043", "epoch": 100.0, "global_step": 265700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9992472713586754e-05, "loss": 0.3607, "step": 100 }, { "epoch": 0.08, "learning_rate": 1.9984945427173507e-05, "loss": 0.3215, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.997741814076026e-05, "loss": 0.3187, "step": 300 }, { "epoch": 0.15, "learning_rate": 1.9969890854347008e-05, "loss": 0.3123, "step": 400 }, { "epoch": 0.19, "learning_rate": 1.996236356793376e-05, "loss": 0.3007, "step": 500 }, { "epoch": 0.23, "learning_rate": 1.9954836281520513e-05, "loss": 0.3073, "step": 600 }, { "epoch": 0.26, "learning_rate": 1.9947308995107266e-05, "loss": 0.3031, "step": 700 }, { "epoch": 0.3, "learning_rate": 1.993978170869402e-05, "loss": 0.3008, "step": 800 }, { "epoch": 0.34, "learning_rate": 1.993225442228077e-05, "loss": 0.2996, "step": 900 }, { "epoch": 0.38, "learning_rate": 1.992472713586752e-05, "loss": 0.3026, "step": 1000 }, { "epoch": 0.41, "learning_rate": 1.9917199849454273e-05, "loss": 0.3003, "step": 1100 }, { "epoch": 0.45, "learning_rate": 1.9909672563041025e-05, "loss": 0.2994, "step": 1200 }, { "epoch": 0.49, "learning_rate": 1.9902145276627778e-05, "loss": 0.2974, "step": 1300 }, { "epoch": 0.53, "learning_rate": 1.989461799021453e-05, "loss": 0.2973, "step": 1400 }, { "epoch": 0.56, "learning_rate": 1.9887090703801283e-05, "loss": 0.2995, "step": 1500 }, { "epoch": 0.6, "learning_rate": 1.987956341738803e-05, "loss": 0.2945, "step": 1600 }, { "epoch": 0.64, "learning_rate": 1.9872036130974784e-05, "loss": 0.296, "step": 1700 }, { "epoch": 0.68, "learning_rate": 1.9864508844561537e-05, "loss": 0.2904, "step": 1800 }, { "epoch": 0.72, "learning_rate": 1.985698155814829e-05, "loss": 0.2931, "step": 1900 }, { "epoch": 0.75, "learning_rate": 1.9849454271735042e-05, "loss": 0.2952, "step": 2000 }, { "epoch": 0.79, "learning_rate": 1.9841926985321794e-05, "loss": 0.2935, "step": 2100 }, { "epoch": 0.83, "learning_rate": 1.9834399698908543e-05, "loss": 0.2912, "step": 2200 }, { "epoch": 0.87, "learning_rate": 1.9826872412495296e-05, "loss": 0.2951, "step": 2300 }, { "epoch": 0.9, "learning_rate": 1.981934512608205e-05, "loss": 0.2873, "step": 2400 }, { "epoch": 0.94, "learning_rate": 1.98118178396688e-05, "loss": 0.2906, "step": 2500 }, { "epoch": 0.98, "learning_rate": 1.9804290553255553e-05, "loss": 0.289, "step": 2600 }, { "epoch": 1.0, "eval_loss": 0.2940625846385956, "eval_runtime": 45.1121, "eval_samples_per_second": 166.253, "eval_steps_per_second": 10.396, "step": 2657 }, { "epoch": 1.02, "learning_rate": 1.9796763266842306e-05, "loss": 0.2831, "step": 2700 }, { "epoch": 1.05, "learning_rate": 1.978923598042906e-05, "loss": 0.2941, "step": 2800 }, { "epoch": 1.09, "learning_rate": 1.9781708694015808e-05, "loss": 0.2866, "step": 2900 }, { "epoch": 1.13, "learning_rate": 1.977418140760256e-05, "loss": 0.2811, "step": 3000 }, { "epoch": 1.17, "learning_rate": 1.9766654121189313e-05, "loss": 0.2912, "step": 3100 }, { "epoch": 1.2, "learning_rate": 1.9759126834776065e-05, "loss": 0.289, "step": 3200 }, { "epoch": 1.24, "learning_rate": 1.9751599548362818e-05, "loss": 0.2907, "step": 3300 }, { "epoch": 1.28, "learning_rate": 1.974407226194957e-05, "loss": 0.2913, "step": 3400 }, { "epoch": 1.32, "learning_rate": 1.973654497553632e-05, "loss": 0.2863, "step": 3500 }, { "epoch": 1.35, "learning_rate": 1.9729017689123072e-05, "loss": 0.2822, "step": 3600 }, { "epoch": 1.39, "learning_rate": 1.9721490402709824e-05, "loss": 0.2808, "step": 3700 }, { "epoch": 1.43, "learning_rate": 1.9713963116296577e-05, "loss": 0.2836, "step": 3800 }, { "epoch": 1.47, "learning_rate": 1.970643582988333e-05, "loss": 0.2796, "step": 3900 }, { "epoch": 1.51, "learning_rate": 1.9698908543470082e-05, "loss": 0.2831, "step": 4000 }, { "epoch": 1.54, "learning_rate": 1.969138125705683e-05, "loss": 0.2878, "step": 4100 }, { "epoch": 1.58, "learning_rate": 1.9683853970643583e-05, "loss": 0.2767, "step": 4200 }, { "epoch": 1.62, "learning_rate": 1.9676326684230336e-05, "loss": 0.2835, "step": 4300 }, { "epoch": 1.66, "learning_rate": 1.966879939781709e-05, "loss": 0.2823, "step": 4400 }, { "epoch": 1.69, "learning_rate": 1.966127211140384e-05, "loss": 0.284, "step": 4500 }, { "epoch": 1.73, "learning_rate": 1.9653744824990594e-05, "loss": 0.2813, "step": 4600 }, { "epoch": 1.77, "learning_rate": 1.9646217538577343e-05, "loss": 0.2806, "step": 4700 }, { "epoch": 1.81, "learning_rate": 1.9638690252164095e-05, "loss": 0.2846, "step": 4800 }, { "epoch": 1.84, "learning_rate": 1.9631162965750848e-05, "loss": 0.2834, "step": 4900 }, { "epoch": 1.88, "learning_rate": 1.96236356793376e-05, "loss": 0.2821, "step": 5000 }, { "epoch": 1.92, "learning_rate": 1.9616108392924353e-05, "loss": 0.2837, "step": 5100 }, { "epoch": 1.96, "learning_rate": 1.9608581106511105e-05, "loss": 0.2782, "step": 5200 }, { "epoch": 1.99, "learning_rate": 1.9601053820097858e-05, "loss": 0.2858, "step": 5300 }, { "epoch": 2.0, "eval_loss": 0.28088775277137756, "eval_runtime": 45.112, "eval_samples_per_second": 166.253, "eval_steps_per_second": 10.396, "step": 5314 }, { "epoch": 2.03, "learning_rate": 1.9593526533684607e-05, "loss": 0.2826, "step": 5400 }, { "epoch": 2.07, "learning_rate": 1.9585999247271363e-05, "loss": 0.2858, "step": 5500 }, { "epoch": 2.11, "learning_rate": 1.9578471960858112e-05, "loss": 0.2782, "step": 5600 }, { "epoch": 2.15, "learning_rate": 1.9570944674444864e-05, "loss": 0.2779, "step": 5700 }, { "epoch": 2.18, "learning_rate": 1.9563417388031617e-05, "loss": 0.2792, "step": 5800 }, { "epoch": 2.22, "learning_rate": 1.955589010161837e-05, "loss": 0.2837, "step": 5900 }, { "epoch": 2.26, "learning_rate": 1.954836281520512e-05, "loss": 0.2792, "step": 6000 }, { "epoch": 2.3, "learning_rate": 1.954083552879187e-05, "loss": 0.2825, "step": 6100 }, { "epoch": 2.33, "learning_rate": 1.9533308242378624e-05, "loss": 0.2762, "step": 6200 }, { "epoch": 2.37, "learning_rate": 1.9525780955965376e-05, "loss": 0.283, "step": 6300 }, { "epoch": 2.41, "learning_rate": 1.951825366955213e-05, "loss": 0.2824, "step": 6400 }, { "epoch": 2.45, "learning_rate": 1.951072638313888e-05, "loss": 0.2787, "step": 6500 }, { "epoch": 2.48, "learning_rate": 1.950319909672563e-05, "loss": 0.2769, "step": 6600 }, { "epoch": 2.52, "learning_rate": 1.9495671810312383e-05, "loss": 0.2791, "step": 6700 }, { "epoch": 2.56, "learning_rate": 1.9488144523899135e-05, "loss": 0.2754, "step": 6800 }, { "epoch": 2.6, "learning_rate": 1.9480617237485888e-05, "loss": 0.2721, "step": 6900 }, { "epoch": 2.63, "learning_rate": 1.947308995107264e-05, "loss": 0.2796, "step": 7000 }, { "epoch": 2.67, "learning_rate": 1.9465562664659393e-05, "loss": 0.2771, "step": 7100 }, { "epoch": 2.71, "learning_rate": 1.9458035378246142e-05, "loss": 0.2778, "step": 7200 }, { "epoch": 2.75, "learning_rate": 1.9450508091832894e-05, "loss": 0.2773, "step": 7300 }, { "epoch": 2.79, "learning_rate": 1.9442980805419647e-05, "loss": 0.2771, "step": 7400 }, { "epoch": 2.82, "learning_rate": 1.94354535190064e-05, "loss": 0.2764, "step": 7500 }, { "epoch": 2.86, "learning_rate": 1.9427926232593152e-05, "loss": 0.2789, "step": 7600 }, { "epoch": 2.9, "learning_rate": 1.9420398946179905e-05, "loss": 0.2768, "step": 7700 }, { "epoch": 2.94, "learning_rate": 1.9412871659766657e-05, "loss": 0.2765, "step": 7800 }, { "epoch": 2.97, "learning_rate": 1.9405344373353406e-05, "loss": 0.2693, "step": 7900 }, { "epoch": 3.0, "eval_loss": 0.2738477289676666, "eval_runtime": 44.824, "eval_samples_per_second": 167.321, "eval_steps_per_second": 10.463, "step": 7971 }, { "epoch": 3.01, "learning_rate": 1.9397817086940162e-05, "loss": 0.277, "step": 8000 }, { "epoch": 3.05, "learning_rate": 1.939028980052691e-05, "loss": 0.2734, "step": 8100 }, { "epoch": 3.09, "learning_rate": 1.9382762514113664e-05, "loss": 0.2767, "step": 8200 }, { "epoch": 3.12, "learning_rate": 1.9375235227700416e-05, "loss": 0.2767, "step": 8300 }, { "epoch": 3.16, "learning_rate": 1.936770794128717e-05, "loss": 0.271, "step": 8400 }, { "epoch": 3.2, "learning_rate": 1.9360180654873918e-05, "loss": 0.2747, "step": 8500 }, { "epoch": 3.24, "learning_rate": 1.9352653368460674e-05, "loss": 0.2758, "step": 8600 }, { "epoch": 3.27, "learning_rate": 1.9345126082047423e-05, "loss": 0.2706, "step": 8700 }, { "epoch": 3.31, "learning_rate": 1.9337598795634175e-05, "loss": 0.2734, "step": 8800 }, { "epoch": 3.35, "learning_rate": 1.9330071509220928e-05, "loss": 0.2732, "step": 8900 }, { "epoch": 3.39, "learning_rate": 1.932254422280768e-05, "loss": 0.2724, "step": 9000 }, { "epoch": 3.42, "learning_rate": 1.931501693639443e-05, "loss": 0.2678, "step": 9100 }, { "epoch": 3.46, "learning_rate": 1.9307489649981182e-05, "loss": 0.2755, "step": 9200 }, { "epoch": 3.5, "learning_rate": 1.9299962363567935e-05, "loss": 0.264, "step": 9300 }, { "epoch": 3.54, "learning_rate": 1.9292435077154687e-05, "loss": 0.2706, "step": 9400 }, { "epoch": 3.58, "learning_rate": 1.928490779074144e-05, "loss": 0.2686, "step": 9500 }, { "epoch": 3.61, "learning_rate": 1.9277380504328192e-05, "loss": 0.2681, "step": 9600 }, { "epoch": 3.65, "learning_rate": 1.926985321791494e-05, "loss": 0.2671, "step": 9700 }, { "epoch": 3.69, "learning_rate": 1.9262325931501694e-05, "loss": 0.2668, "step": 9800 }, { "epoch": 3.73, "learning_rate": 1.9254798645088446e-05, "loss": 0.2621, "step": 9900 }, { "epoch": 3.76, "learning_rate": 1.92472713586752e-05, "loss": 0.2641, "step": 10000 }, { "epoch": 3.8, "learning_rate": 1.923974407226195e-05, "loss": 0.2645, "step": 10100 }, { "epoch": 3.84, "learning_rate": 1.9232216785848704e-05, "loss": 0.2593, "step": 10200 }, { "epoch": 3.88, "learning_rate": 1.9224689499435456e-05, "loss": 0.2604, "step": 10300 }, { "epoch": 3.91, "learning_rate": 1.9217162213022205e-05, "loss": 0.2614, "step": 10400 }, { "epoch": 3.95, "learning_rate": 1.920963492660896e-05, "loss": 0.2627, "step": 10500 }, { "epoch": 3.99, "learning_rate": 1.920210764019571e-05, "loss": 0.2578, "step": 10600 }, { "epoch": 4.0, "eval_loss": 0.2545997202396393, "eval_runtime": 44.2178, "eval_samples_per_second": 169.615, "eval_steps_per_second": 10.607, "step": 10628 }, { "epoch": 4.03, "learning_rate": 1.9194580353782463e-05, "loss": 0.2567, "step": 10700 }, { "epoch": 4.06, "learning_rate": 1.9187053067369215e-05, "loss": 0.2547, "step": 10800 }, { "epoch": 4.1, "learning_rate": 1.9179525780955968e-05, "loss": 0.2564, "step": 10900 }, { "epoch": 4.14, "learning_rate": 1.9171998494542717e-05, "loss": 0.2603, "step": 11000 }, { "epoch": 4.18, "learning_rate": 1.9164471208129473e-05, "loss": 0.2514, "step": 11100 }, { "epoch": 4.22, "learning_rate": 1.9156943921716222e-05, "loss": 0.2571, "step": 11200 }, { "epoch": 4.25, "learning_rate": 1.9149416635302975e-05, "loss": 0.2513, "step": 11300 }, { "epoch": 4.29, "learning_rate": 1.9141889348889727e-05, "loss": 0.2531, "step": 11400 }, { "epoch": 4.33, "learning_rate": 1.913436206247648e-05, "loss": 0.2485, "step": 11500 }, { "epoch": 4.37, "learning_rate": 1.912683477606323e-05, "loss": 0.2519, "step": 11600 }, { "epoch": 4.4, "learning_rate": 1.9119307489649985e-05, "loss": 0.2506, "step": 11700 }, { "epoch": 4.44, "learning_rate": 1.9111780203236734e-05, "loss": 0.2487, "step": 11800 }, { "epoch": 4.48, "learning_rate": 1.9104252916823486e-05, "loss": 0.2456, "step": 11900 }, { "epoch": 4.52, "learning_rate": 1.909672563041024e-05, "loss": 0.2493, "step": 12000 }, { "epoch": 4.55, "learning_rate": 1.908919834399699e-05, "loss": 0.2439, "step": 12100 }, { "epoch": 4.59, "learning_rate": 1.908167105758374e-05, "loss": 0.2434, "step": 12200 }, { "epoch": 4.63, "learning_rate": 1.9074143771170493e-05, "loss": 0.2454, "step": 12300 }, { "epoch": 4.67, "learning_rate": 1.9066616484757246e-05, "loss": 0.2406, "step": 12400 }, { "epoch": 4.7, "learning_rate": 1.9059089198343998e-05, "loss": 0.2392, "step": 12500 }, { "epoch": 4.74, "learning_rate": 1.905156191193075e-05, "loss": 0.2394, "step": 12600 }, { "epoch": 4.78, "learning_rate": 1.9044034625517503e-05, "loss": 0.2351, "step": 12700 }, { "epoch": 4.82, "learning_rate": 1.9036507339104256e-05, "loss": 0.2367, "step": 12800 }, { "epoch": 4.86, "learning_rate": 1.9028980052691005e-05, "loss": 0.233, "step": 12900 }, { "epoch": 4.89, "learning_rate": 1.902145276627776e-05, "loss": 0.2276, "step": 13000 }, { "epoch": 4.93, "learning_rate": 1.901392547986451e-05, "loss": 0.2323, "step": 13100 }, { "epoch": 4.97, "learning_rate": 1.9006398193451262e-05, "loss": 0.2211, "step": 13200 }, { "epoch": 5.0, "eval_loss": 0.21532748639583588, "eval_runtime": 44.0312, "eval_samples_per_second": 170.334, "eval_steps_per_second": 10.652, "step": 13285 }, { "epoch": 5.01, "learning_rate": 1.8998870907038015e-05, "loss": 0.2225, "step": 13300 }, { "epoch": 5.04, "learning_rate": 1.8991343620624767e-05, "loss": 0.2211, "step": 13400 }, { "epoch": 5.08, "learning_rate": 1.8983816334211516e-05, "loss": 0.2178, "step": 13500 }, { "epoch": 5.12, "learning_rate": 1.8976289047798272e-05, "loss": 0.2164, "step": 13600 }, { "epoch": 5.16, "learning_rate": 1.896876176138502e-05, "loss": 0.2144, "step": 13700 }, { "epoch": 5.19, "learning_rate": 1.8961234474971774e-05, "loss": 0.2093, "step": 13800 }, { "epoch": 5.23, "learning_rate": 1.8953707188558526e-05, "loss": 0.2087, "step": 13900 }, { "epoch": 5.27, "learning_rate": 1.894617990214528e-05, "loss": 0.2111, "step": 14000 }, { "epoch": 5.31, "learning_rate": 1.8938652615732028e-05, "loss": 0.2041, "step": 14100 }, { "epoch": 5.34, "learning_rate": 1.8931125329318784e-05, "loss": 0.2023, "step": 14200 }, { "epoch": 5.38, "learning_rate": 1.8923598042905533e-05, "loss": 0.2011, "step": 14300 }, { "epoch": 5.42, "learning_rate": 1.8916070756492286e-05, "loss": 0.2028, "step": 14400 }, { "epoch": 5.46, "learning_rate": 1.8908543470079038e-05, "loss": 0.198, "step": 14500 }, { "epoch": 5.49, "learning_rate": 1.890101618366579e-05, "loss": 0.1986, "step": 14600 }, { "epoch": 5.53, "learning_rate": 1.889348889725254e-05, "loss": 0.1959, "step": 14700 }, { "epoch": 5.57, "learning_rate": 1.8885961610839296e-05, "loss": 0.1953, "step": 14800 }, { "epoch": 5.61, "learning_rate": 1.8878434324426048e-05, "loss": 0.193, "step": 14900 }, { "epoch": 5.65, "learning_rate": 1.8870907038012797e-05, "loss": 0.1918, "step": 15000 }, { "epoch": 5.68, "learning_rate": 1.886337975159955e-05, "loss": 0.1911, "step": 15100 }, { "epoch": 5.72, "learning_rate": 1.8855852465186302e-05, "loss": 0.1889, "step": 15200 }, { "epoch": 5.76, "learning_rate": 1.8848325178773055e-05, "loss": 0.1879, "step": 15300 }, { "epoch": 5.8, "learning_rate": 1.8840797892359804e-05, "loss": 0.1844, "step": 15400 }, { "epoch": 5.83, "learning_rate": 1.883327060594656e-05, "loss": 0.1821, "step": 15500 }, { "epoch": 5.87, "learning_rate": 1.882574331953331e-05, "loss": 0.1808, "step": 15600 }, { "epoch": 5.91, "learning_rate": 1.881821603312006e-05, "loss": 0.1863, "step": 15700 }, { "epoch": 5.95, "learning_rate": 1.8810688746706814e-05, "loss": 0.1806, "step": 15800 }, { "epoch": 5.98, "learning_rate": 1.8803161460293567e-05, "loss": 0.1799, "step": 15900 }, { "epoch": 6.0, "eval_loss": 0.1794862300157547, "eval_runtime": 44.3468, "eval_samples_per_second": 169.121, "eval_steps_per_second": 10.576, "step": 15942 }, { "epoch": 6.02, "learning_rate": 1.8795634173880316e-05, "loss": 0.1773, "step": 16000 }, { "epoch": 6.06, "learning_rate": 1.878810688746707e-05, "loss": 0.1746, "step": 16100 }, { "epoch": 6.1, "learning_rate": 1.878057960105382e-05, "loss": 0.1772, "step": 16200 }, { "epoch": 6.13, "learning_rate": 1.8773052314640573e-05, "loss": 0.1724, "step": 16300 }, { "epoch": 6.17, "learning_rate": 1.8765525028227326e-05, "loss": 0.17, "step": 16400 }, { "epoch": 6.21, "learning_rate": 1.8757997741814078e-05, "loss": 0.1737, "step": 16500 }, { "epoch": 6.25, "learning_rate": 1.8750470455400827e-05, "loss": 0.1732, "step": 16600 }, { "epoch": 6.29, "learning_rate": 1.8742943168987583e-05, "loss": 0.1689, "step": 16700 }, { "epoch": 6.32, "learning_rate": 1.8735415882574332e-05, "loss": 0.1704, "step": 16800 }, { "epoch": 6.36, "learning_rate": 1.8727888596161085e-05, "loss": 0.1664, "step": 16900 }, { "epoch": 6.4, "learning_rate": 1.8720361309747837e-05, "loss": 0.1651, "step": 17000 }, { "epoch": 6.44, "learning_rate": 1.871283402333459e-05, "loss": 0.1678, "step": 17100 }, { "epoch": 6.47, "learning_rate": 1.870530673692134e-05, "loss": 0.1674, "step": 17200 }, { "epoch": 6.51, "learning_rate": 1.8697779450508095e-05, "loss": 0.1674, "step": 17300 }, { "epoch": 6.55, "learning_rate": 1.8690252164094847e-05, "loss": 0.1643, "step": 17400 }, { "epoch": 6.59, "learning_rate": 1.8682724877681597e-05, "loss": 0.1667, "step": 17500 }, { "epoch": 6.62, "learning_rate": 1.867519759126835e-05, "loss": 0.1601, "step": 17600 }, { "epoch": 6.66, "learning_rate": 1.86676703048551e-05, "loss": 0.1645, "step": 17700 }, { "epoch": 6.7, "learning_rate": 1.8660143018441854e-05, "loss": 0.1637, "step": 17800 }, { "epoch": 6.74, "learning_rate": 1.8652615732028607e-05, "loss": 0.1633, "step": 17900 }, { "epoch": 6.77, "learning_rate": 1.864508844561536e-05, "loss": 0.1651, "step": 18000 }, { "epoch": 6.81, "learning_rate": 1.863756115920211e-05, "loss": 0.1589, "step": 18100 }, { "epoch": 6.85, "learning_rate": 1.863003387278886e-05, "loss": 0.1612, "step": 18200 }, { "epoch": 6.89, "learning_rate": 1.8622506586375613e-05, "loss": 0.1629, "step": 18300 }, { "epoch": 6.93, "learning_rate": 1.8614979299962366e-05, "loss": 0.1571, "step": 18400 }, { "epoch": 6.96, "learning_rate": 1.8607452013549115e-05, "loss": 0.158, "step": 18500 }, { "epoch": 7.0, "eval_loss": 0.162311390042305, "eval_runtime": 44.6199, "eval_samples_per_second": 168.087, "eval_steps_per_second": 10.511, "step": 18599 }, { "epoch": 7.0, "learning_rate": 1.859992472713587e-05, "loss": 0.158, "step": 18600 }, { "epoch": 7.04, "learning_rate": 1.859239744072262e-05, "loss": 0.1588, "step": 18700 }, { "epoch": 7.08, "learning_rate": 1.8584870154309373e-05, "loss": 0.1556, "step": 18800 }, { "epoch": 7.11, "learning_rate": 1.8577342867896125e-05, "loss": 0.1552, "step": 18900 }, { "epoch": 7.15, "learning_rate": 1.8569815581482878e-05, "loss": 0.1567, "step": 19000 }, { "epoch": 7.19, "learning_rate": 1.8562288295069627e-05, "loss": 0.1533, "step": 19100 }, { "epoch": 7.23, "learning_rate": 1.8554761008656383e-05, "loss": 0.1543, "step": 19200 }, { "epoch": 7.26, "learning_rate": 1.854723372224313e-05, "loss": 0.1545, "step": 19300 }, { "epoch": 7.3, "learning_rate": 1.8539706435829884e-05, "loss": 0.1557, "step": 19400 }, { "epoch": 7.34, "learning_rate": 1.8532179149416637e-05, "loss": 0.1524, "step": 19500 }, { "epoch": 7.38, "learning_rate": 1.852465186300339e-05, "loss": 0.1538, "step": 19600 }, { "epoch": 7.41, "learning_rate": 1.851712457659014e-05, "loss": 0.1533, "step": 19700 }, { "epoch": 7.45, "learning_rate": 1.8509597290176894e-05, "loss": 0.1506, "step": 19800 }, { "epoch": 7.49, "learning_rate": 1.8502070003763647e-05, "loss": 0.1499, "step": 19900 }, { "epoch": 7.53, "learning_rate": 1.8494542717350396e-05, "loss": 0.1514, "step": 20000 }, { "epoch": 7.56, "learning_rate": 1.848701543093715e-05, "loss": 0.1497, "step": 20100 }, { "epoch": 7.6, "learning_rate": 1.84794881445239e-05, "loss": 0.151, "step": 20200 }, { "epoch": 7.64, "learning_rate": 1.8471960858110653e-05, "loss": 0.1523, "step": 20300 }, { "epoch": 7.68, "learning_rate": 1.8464433571697406e-05, "loss": 0.1484, "step": 20400 }, { "epoch": 7.72, "learning_rate": 1.845690628528416e-05, "loss": 0.15, "step": 20500 }, { "epoch": 7.75, "learning_rate": 1.8449378998870908e-05, "loss": 0.1467, "step": 20600 }, { "epoch": 7.79, "learning_rate": 1.844185171245766e-05, "loss": 0.1467, "step": 20700 }, { "epoch": 7.83, "learning_rate": 1.8434324426044413e-05, "loss": 0.1469, "step": 20800 }, { "epoch": 7.87, "learning_rate": 1.8426797139631165e-05, "loss": 0.148, "step": 20900 }, { "epoch": 7.9, "learning_rate": 1.8419269853217914e-05, "loss": 0.1484, "step": 21000 }, { "epoch": 7.94, "learning_rate": 1.841174256680467e-05, "loss": 0.1459, "step": 21100 }, { "epoch": 7.98, "learning_rate": 1.840421528039142e-05, "loss": 0.1481, "step": 21200 }, { "epoch": 8.0, "eval_loss": 0.14529532194137573, "eval_runtime": 44.2737, "eval_samples_per_second": 169.401, "eval_steps_per_second": 10.593, "step": 21256 }, { "epoch": 8.02, "learning_rate": 1.8396687993978172e-05, "loss": 0.1446, "step": 21300 }, { "epoch": 8.05, "learning_rate": 1.8389160707564924e-05, "loss": 0.1465, "step": 21400 }, { "epoch": 8.09, "learning_rate": 1.8381633421151677e-05, "loss": 0.1458, "step": 21500 }, { "epoch": 8.13, "learning_rate": 1.8374106134738426e-05, "loss": 0.145, "step": 21600 }, { "epoch": 8.17, "learning_rate": 1.8366578848325182e-05, "loss": 0.1457, "step": 21700 }, { "epoch": 8.2, "learning_rate": 1.835905156191193e-05, "loss": 0.1422, "step": 21800 }, { "epoch": 8.24, "learning_rate": 1.8351524275498683e-05, "loss": 0.1435, "step": 21900 }, { "epoch": 8.28, "learning_rate": 1.8343996989085436e-05, "loss": 0.1456, "step": 22000 }, { "epoch": 8.32, "learning_rate": 1.833646970267219e-05, "loss": 0.1472, "step": 22100 }, { "epoch": 8.36, "learning_rate": 1.8328942416258938e-05, "loss": 0.1434, "step": 22200 }, { "epoch": 8.39, "learning_rate": 1.8321415129845694e-05, "loss": 0.1446, "step": 22300 }, { "epoch": 8.43, "learning_rate": 1.8313887843432446e-05, "loss": 0.1464, "step": 22400 }, { "epoch": 8.47, "learning_rate": 1.8306360557019195e-05, "loss": 0.1446, "step": 22500 }, { "epoch": 8.51, "learning_rate": 1.8298833270605948e-05, "loss": 0.1428, "step": 22600 }, { "epoch": 8.54, "learning_rate": 1.82913059841927e-05, "loss": 0.1437, "step": 22700 }, { "epoch": 8.58, "learning_rate": 1.8283778697779453e-05, "loss": 0.1412, "step": 22800 }, { "epoch": 8.62, "learning_rate": 1.8276251411366205e-05, "loss": 0.1396, "step": 22900 }, { "epoch": 8.66, "learning_rate": 1.8268724124952958e-05, "loss": 0.1426, "step": 23000 }, { "epoch": 8.69, "learning_rate": 1.8261196838539707e-05, "loss": 0.1421, "step": 23100 }, { "epoch": 8.73, "learning_rate": 1.825366955212646e-05, "loss": 0.1418, "step": 23200 }, { "epoch": 8.77, "learning_rate": 1.8246142265713212e-05, "loss": 0.1403, "step": 23300 }, { "epoch": 8.81, "learning_rate": 1.8238614979299964e-05, "loss": 0.1416, "step": 23400 }, { "epoch": 8.84, "learning_rate": 1.8231087692886717e-05, "loss": 0.1403, "step": 23500 }, { "epoch": 8.88, "learning_rate": 1.822356040647347e-05, "loss": 0.1396, "step": 23600 }, { "epoch": 8.92, "learning_rate": 1.821603312006022e-05, "loss": 0.1378, "step": 23700 }, { "epoch": 8.96, "learning_rate": 1.820850583364697e-05, "loss": 0.1392, "step": 23800 }, { "epoch": 9.0, "learning_rate": 1.8200978547233724e-05, "loss": 0.1391, "step": 23900 }, { "epoch": 9.0, "eval_loss": 0.13683784008026123, "eval_runtime": 44.0792, "eval_samples_per_second": 170.148, "eval_steps_per_second": 10.64, "step": 23913 }, { "epoch": 9.03, "learning_rate": 1.8193451260820476e-05, "loss": 0.1407, "step": 24000 }, { "epoch": 9.07, "learning_rate": 1.8185923974407225e-05, "loss": 0.1386, "step": 24100 }, { "epoch": 9.11, "learning_rate": 1.817839668799398e-05, "loss": 0.1385, "step": 24200 }, { "epoch": 9.15, "learning_rate": 1.817086940158073e-05, "loss": 0.1403, "step": 24300 }, { "epoch": 9.18, "learning_rate": 1.8163342115167483e-05, "loss": 0.1395, "step": 24400 }, { "epoch": 9.22, "learning_rate": 1.8155814828754235e-05, "loss": 0.1374, "step": 24500 }, { "epoch": 9.26, "learning_rate": 1.8148287542340988e-05, "loss": 0.1354, "step": 24600 }, { "epoch": 9.3, "learning_rate": 1.8140760255927737e-05, "loss": 0.1367, "step": 24700 }, { "epoch": 9.33, "learning_rate": 1.8133232969514493e-05, "loss": 0.1389, "step": 24800 }, { "epoch": 9.37, "learning_rate": 1.8125705683101245e-05, "loss": 0.1355, "step": 24900 }, { "epoch": 9.41, "learning_rate": 1.8118178396687994e-05, "loss": 0.1359, "step": 25000 }, { "epoch": 9.45, "learning_rate": 1.8110651110274747e-05, "loss": 0.1351, "step": 25100 }, { "epoch": 9.48, "learning_rate": 1.81031238238615e-05, "loss": 0.1381, "step": 25200 }, { "epoch": 9.52, "learning_rate": 1.8095596537448252e-05, "loss": 0.1364, "step": 25300 }, { "epoch": 9.56, "learning_rate": 1.8088069251035005e-05, "loss": 0.1339, "step": 25400 }, { "epoch": 9.6, "learning_rate": 1.8080541964621757e-05, "loss": 0.1346, "step": 25500 }, { "epoch": 9.63, "learning_rate": 1.8073014678208506e-05, "loss": 0.1338, "step": 25600 }, { "epoch": 9.67, "learning_rate": 1.806548739179526e-05, "loss": 0.1332, "step": 25700 }, { "epoch": 9.71, "learning_rate": 1.805796010538201e-05, "loss": 0.1344, "step": 25800 }, { "epoch": 9.75, "learning_rate": 1.8050432818968764e-05, "loss": 0.134, "step": 25900 }, { "epoch": 9.79, "learning_rate": 1.8042905532555516e-05, "loss": 0.1365, "step": 26000 }, { "epoch": 9.82, "learning_rate": 1.803537824614227e-05, "loss": 0.1354, "step": 26100 }, { "epoch": 9.86, "learning_rate": 1.8027850959729018e-05, "loss": 0.1327, "step": 26200 }, { "epoch": 9.9, "learning_rate": 1.802032367331577e-05, "loss": 0.1366, "step": 26300 }, { "epoch": 9.94, "learning_rate": 1.8012796386902523e-05, "loss": 0.1343, "step": 26400 }, { "epoch": 9.97, "learning_rate": 1.8005269100489275e-05, "loss": 0.1348, "step": 26500 }, { "epoch": 10.0, "eval_loss": 0.13540224730968475, "eval_runtime": 43.8422, "eval_samples_per_second": 171.068, "eval_steps_per_second": 10.697, "step": 26570 }, { "epoch": 10.01, "learning_rate": 1.7997741814076028e-05, "loss": 0.134, "step": 26600 }, { "epoch": 10.05, "learning_rate": 1.799021452766278e-05, "loss": 0.1327, "step": 26700 }, { "epoch": 10.09, "learning_rate": 1.798268724124953e-05, "loss": 0.1303, "step": 26800 }, { "epoch": 10.12, "learning_rate": 1.7975159954836282e-05, "loss": 0.1343, "step": 26900 }, { "epoch": 10.16, "learning_rate": 1.7967632668423035e-05, "loss": 0.1323, "step": 27000 }, { "epoch": 10.2, "learning_rate": 1.7960105382009787e-05, "loss": 0.1318, "step": 27100 }, { "epoch": 10.24, "learning_rate": 1.7952578095596536e-05, "loss": 0.1322, "step": 27200 }, { "epoch": 10.27, "learning_rate": 1.7945050809183292e-05, "loss": 0.1318, "step": 27300 }, { "epoch": 10.31, "learning_rate": 1.7937523522770045e-05, "loss": 0.1324, "step": 27400 }, { "epoch": 10.35, "learning_rate": 1.7929996236356794e-05, "loss": 0.1306, "step": 27500 }, { "epoch": 10.39, "learning_rate": 1.7922468949943546e-05, "loss": 0.1283, "step": 27600 }, { "epoch": 10.43, "learning_rate": 1.79149416635303e-05, "loss": 0.1311, "step": 27700 }, { "epoch": 10.46, "learning_rate": 1.790741437711705e-05, "loss": 0.1313, "step": 27800 }, { "epoch": 10.5, "learning_rate": 1.7899887090703804e-05, "loss": 0.1291, "step": 27900 }, { "epoch": 10.54, "learning_rate": 1.7892359804290556e-05, "loss": 0.1291, "step": 28000 }, { "epoch": 10.58, "learning_rate": 1.7884832517877305e-05, "loss": 0.1314, "step": 28100 }, { "epoch": 10.61, "learning_rate": 1.7877305231464058e-05, "loss": 0.1294, "step": 28200 }, { "epoch": 10.65, "learning_rate": 1.786977794505081e-05, "loss": 0.1308, "step": 28300 }, { "epoch": 10.69, "learning_rate": 1.7862250658637563e-05, "loss": 0.1317, "step": 28400 }, { "epoch": 10.73, "learning_rate": 1.7854723372224315e-05, "loss": 0.129, "step": 28500 }, { "epoch": 10.76, "learning_rate": 1.7847196085811068e-05, "loss": 0.1307, "step": 28600 }, { "epoch": 10.8, "learning_rate": 1.7839668799397817e-05, "loss": 0.1302, "step": 28700 }, { "epoch": 10.84, "learning_rate": 1.783214151298457e-05, "loss": 0.1283, "step": 28800 }, { "epoch": 10.88, "learning_rate": 1.7824614226571322e-05, "loss": 0.1277, "step": 28900 }, { "epoch": 10.91, "learning_rate": 1.7817086940158075e-05, "loss": 0.1287, "step": 29000 }, { "epoch": 10.95, "learning_rate": 1.7809559653744827e-05, "loss": 0.13, "step": 29100 }, { "epoch": 10.99, "learning_rate": 1.780203236733158e-05, "loss": 0.129, "step": 29200 }, { "epoch": 11.0, "eval_loss": 0.12486864626407623, "eval_runtime": 44.5752, "eval_samples_per_second": 168.255, "eval_steps_per_second": 10.522, "step": 29227 }, { "epoch": 11.03, "learning_rate": 1.779450508091833e-05, "loss": 0.1259, "step": 29300 }, { "epoch": 11.07, "learning_rate": 1.778697779450508e-05, "loss": 0.128, "step": 29400 }, { "epoch": 11.1, "learning_rate": 1.7779450508091834e-05, "loss": 0.127, "step": 29500 }, { "epoch": 11.14, "learning_rate": 1.7771923221678586e-05, "loss": 0.1277, "step": 29600 }, { "epoch": 11.18, "learning_rate": 1.776439593526534e-05, "loss": 0.1271, "step": 29700 }, { "epoch": 11.22, "learning_rate": 1.775686864885209e-05, "loss": 0.1263, "step": 29800 }, { "epoch": 11.25, "learning_rate": 1.7749341362438844e-05, "loss": 0.1259, "step": 29900 }, { "epoch": 11.29, "learning_rate": 1.7741814076025593e-05, "loss": 0.1257, "step": 30000 }, { "epoch": 11.33, "learning_rate": 1.7734286789612346e-05, "loss": 0.125, "step": 30100 }, { "epoch": 11.37, "learning_rate": 1.7726759503199098e-05, "loss": 0.128, "step": 30200 }, { "epoch": 11.4, "learning_rate": 1.771923221678585e-05, "loss": 0.1271, "step": 30300 }, { "epoch": 11.44, "learning_rate": 1.7711704930372603e-05, "loss": 0.1268, "step": 30400 }, { "epoch": 11.48, "learning_rate": 1.7704177643959356e-05, "loss": 0.1262, "step": 30500 }, { "epoch": 11.52, "learning_rate": 1.7696650357546105e-05, "loss": 0.1247, "step": 30600 }, { "epoch": 11.55, "learning_rate": 1.7689123071132857e-05, "loss": 0.1243, "step": 30700 }, { "epoch": 11.59, "learning_rate": 1.768159578471961e-05, "loss": 0.1249, "step": 30800 }, { "epoch": 11.63, "learning_rate": 1.7674068498306362e-05, "loss": 0.1255, "step": 30900 }, { "epoch": 11.67, "learning_rate": 1.7666541211893115e-05, "loss": 0.1258, "step": 31000 }, { "epoch": 11.7, "learning_rate": 1.7659013925479867e-05, "loss": 0.1234, "step": 31100 }, { "epoch": 11.74, "learning_rate": 1.7651486639066616e-05, "loss": 0.1226, "step": 31200 }, { "epoch": 11.78, "learning_rate": 1.764395935265337e-05, "loss": 0.1251, "step": 31300 }, { "epoch": 11.82, "learning_rate": 1.763643206624012e-05, "loss": 0.1238, "step": 31400 }, { "epoch": 11.86, "learning_rate": 1.7628904779826874e-05, "loss": 0.1262, "step": 31500 }, { "epoch": 11.89, "learning_rate": 1.7621377493413626e-05, "loss": 0.1249, "step": 31600 }, { "epoch": 11.93, "learning_rate": 1.761385020700038e-05, "loss": 0.1224, "step": 31700 }, { "epoch": 11.97, "learning_rate": 1.7606322920587128e-05, "loss": 0.126, "step": 31800 }, { "epoch": 12.0, "eval_loss": 0.12289831042289734, "eval_runtime": 44.279, "eval_samples_per_second": 169.381, "eval_steps_per_second": 10.592, "step": 31884 }, { "epoch": 12.01, "learning_rate": 1.759879563417388e-05, "loss": 0.1247, "step": 31900 }, { "epoch": 12.04, "learning_rate": 1.7591268347760633e-05, "loss": 0.1224, "step": 32000 }, { "epoch": 12.08, "learning_rate": 1.7583741061347386e-05, "loss": 0.1225, "step": 32100 }, { "epoch": 12.12, "learning_rate": 1.7576213774934138e-05, "loss": 0.1247, "step": 32200 }, { "epoch": 12.16, "learning_rate": 1.756868648852089e-05, "loss": 0.1244, "step": 32300 }, { "epoch": 12.19, "learning_rate": 1.7561159202107643e-05, "loss": 0.1231, "step": 32400 }, { "epoch": 12.23, "learning_rate": 1.7553631915694392e-05, "loss": 0.1215, "step": 32500 }, { "epoch": 12.27, "learning_rate": 1.7546104629281148e-05, "loss": 0.1238, "step": 32600 }, { "epoch": 12.31, "learning_rate": 1.7538577342867897e-05, "loss": 0.1251, "step": 32700 }, { "epoch": 12.34, "learning_rate": 1.753105005645465e-05, "loss": 0.1248, "step": 32800 }, { "epoch": 12.38, "learning_rate": 1.7523522770041402e-05, "loss": 0.1227, "step": 32900 }, { "epoch": 12.42, "learning_rate": 1.7515995483628155e-05, "loss": 0.1219, "step": 33000 }, { "epoch": 12.46, "learning_rate": 1.7508468197214904e-05, "loss": 0.121, "step": 33100 }, { "epoch": 12.5, "learning_rate": 1.7500940910801657e-05, "loss": 0.1225, "step": 33200 }, { "epoch": 12.53, "learning_rate": 1.749341362438841e-05, "loss": 0.1224, "step": 33300 }, { "epoch": 12.57, "learning_rate": 1.748588633797516e-05, "loss": 0.1214, "step": 33400 }, { "epoch": 12.61, "learning_rate": 1.7478359051561914e-05, "loss": 0.1217, "step": 33500 }, { "epoch": 12.65, "learning_rate": 1.7470831765148667e-05, "loss": 0.1201, "step": 33600 }, { "epoch": 12.68, "learning_rate": 1.7463304478735416e-05, "loss": 0.1212, "step": 33700 }, { "epoch": 12.72, "learning_rate": 1.7455777192322168e-05, "loss": 0.1218, "step": 33800 }, { "epoch": 12.76, "learning_rate": 1.744824990590892e-05, "loss": 0.1221, "step": 33900 }, { "epoch": 12.8, "learning_rate": 1.7440722619495673e-05, "loss": 0.1199, "step": 34000 }, { "epoch": 12.83, "learning_rate": 1.7433195333082426e-05, "loss": 0.122, "step": 34100 }, { "epoch": 12.87, "learning_rate": 1.7425668046669178e-05, "loss": 0.12, "step": 34200 }, { "epoch": 12.91, "learning_rate": 1.7418140760255927e-05, "loss": 0.1222, "step": 34300 }, { "epoch": 12.95, "learning_rate": 1.741061347384268e-05, "loss": 0.1228, "step": 34400 }, { "epoch": 12.98, "learning_rate": 1.7403086187429432e-05, "loss": 0.1216, "step": 34500 }, { "epoch": 13.0, "eval_loss": 0.11841125041246414, "eval_runtime": 44.6411, "eval_samples_per_second": 168.006, "eval_steps_per_second": 10.506, "step": 34541 }, { "epoch": 13.02, "learning_rate": 1.7395558901016185e-05, "loss": 0.1195, "step": 34600 }, { "epoch": 13.06, "learning_rate": 1.7388031614602937e-05, "loss": 0.1211, "step": 34700 }, { "epoch": 13.1, "learning_rate": 1.738050432818969e-05, "loss": 0.1209, "step": 34800 }, { "epoch": 13.14, "learning_rate": 1.7372977041776442e-05, "loss": 0.122, "step": 34900 }, { "epoch": 13.17, "learning_rate": 1.736544975536319e-05, "loss": 0.1206, "step": 35000 }, { "epoch": 13.21, "learning_rate": 1.7357922468949947e-05, "loss": 0.1198, "step": 35100 }, { "epoch": 13.25, "learning_rate": 1.7350395182536697e-05, "loss": 0.1196, "step": 35200 }, { "epoch": 13.29, "learning_rate": 1.734286789612345e-05, "loss": 0.1192, "step": 35300 }, { "epoch": 13.32, "learning_rate": 1.73353406097102e-05, "loss": 0.12, "step": 35400 }, { "epoch": 13.36, "learning_rate": 1.7327813323296954e-05, "loss": 0.1179, "step": 35500 }, { "epoch": 13.4, "learning_rate": 1.7320286036883703e-05, "loss": 0.1201, "step": 35600 }, { "epoch": 13.44, "learning_rate": 1.731275875047046e-05, "loss": 0.1181, "step": 35700 }, { "epoch": 13.47, "learning_rate": 1.730523146405721e-05, "loss": 0.1185, "step": 35800 }, { "epoch": 13.51, "learning_rate": 1.729770417764396e-05, "loss": 0.1171, "step": 35900 }, { "epoch": 13.55, "learning_rate": 1.7290176891230713e-05, "loss": 0.1202, "step": 36000 }, { "epoch": 13.59, "learning_rate": 1.7282649604817466e-05, "loss": 0.1191, "step": 36100 }, { "epoch": 13.62, "learning_rate": 1.7275122318404215e-05, "loss": 0.1201, "step": 36200 }, { "epoch": 13.66, "learning_rate": 1.7267595031990967e-05, "loss": 0.1188, "step": 36300 }, { "epoch": 13.7, "learning_rate": 1.726006774557772e-05, "loss": 0.1191, "step": 36400 }, { "epoch": 13.74, "learning_rate": 1.7252540459164473e-05, "loss": 0.117, "step": 36500 }, { "epoch": 13.77, "learning_rate": 1.7245013172751225e-05, "loss": 0.1177, "step": 36600 }, { "epoch": 13.81, "learning_rate": 1.7237485886337978e-05, "loss": 0.1173, "step": 36700 }, { "epoch": 13.85, "learning_rate": 1.7229958599924727e-05, "loss": 0.1176, "step": 36800 }, { "epoch": 13.89, "learning_rate": 1.722243131351148e-05, "loss": 0.1177, "step": 36900 }, { "epoch": 13.93, "learning_rate": 1.7214904027098232e-05, "loss": 0.1168, "step": 37000 }, { "epoch": 13.96, "learning_rate": 1.7207376740684984e-05, "loss": 0.1175, "step": 37100 }, { "epoch": 14.0, "eval_loss": 0.1184767335653305, "eval_runtime": 44.9958, "eval_samples_per_second": 166.682, "eval_steps_per_second": 10.423, "step": 37198 }, { "epoch": 14.0, "learning_rate": 1.7199849454271737e-05, "loss": 0.1172, "step": 37200 }, { "epoch": 14.04, "learning_rate": 1.719232216785849e-05, "loss": 0.117, "step": 37300 }, { "epoch": 14.08, "learning_rate": 1.7184794881445242e-05, "loss": 0.1175, "step": 37400 }, { "epoch": 14.11, "learning_rate": 1.717726759503199e-05, "loss": 0.1164, "step": 37500 }, { "epoch": 14.15, "learning_rate": 1.7169740308618747e-05, "loss": 0.1161, "step": 37600 }, { "epoch": 14.19, "learning_rate": 1.7162213022205496e-05, "loss": 0.1183, "step": 37700 }, { "epoch": 14.23, "learning_rate": 1.715468573579225e-05, "loss": 0.1161, "step": 37800 }, { "epoch": 14.26, "learning_rate": 1.7147158449379e-05, "loss": 0.117, "step": 37900 }, { "epoch": 14.3, "learning_rate": 1.7139631162965753e-05, "loss": 0.1169, "step": 38000 }, { "epoch": 14.34, "learning_rate": 1.7132103876552503e-05, "loss": 0.1171, "step": 38100 }, { "epoch": 14.38, "learning_rate": 1.712457659013926e-05, "loss": 0.1165, "step": 38200 }, { "epoch": 14.41, "learning_rate": 1.7117049303726008e-05, "loss": 0.115, "step": 38300 }, { "epoch": 14.45, "learning_rate": 1.710952201731276e-05, "loss": 0.1164, "step": 38400 }, { "epoch": 14.49, "learning_rate": 1.7101994730899513e-05, "loss": 0.1151, "step": 38500 }, { "epoch": 14.53, "learning_rate": 1.7094467444486265e-05, "loss": 0.1163, "step": 38600 }, { "epoch": 14.57, "learning_rate": 1.7086940158073014e-05, "loss": 0.1157, "step": 38700 }, { "epoch": 14.6, "learning_rate": 1.7079412871659767e-05, "loss": 0.1154, "step": 38800 }, { "epoch": 14.64, "learning_rate": 1.707188558524652e-05, "loss": 0.1145, "step": 38900 }, { "epoch": 14.68, "learning_rate": 1.7064358298833272e-05, "loss": 0.1153, "step": 39000 }, { "epoch": 14.72, "learning_rate": 1.7056831012420024e-05, "loss": 0.1163, "step": 39100 }, { "epoch": 14.75, "learning_rate": 1.7049303726006777e-05, "loss": 0.1173, "step": 39200 }, { "epoch": 14.79, "learning_rate": 1.7041776439593526e-05, "loss": 0.1161, "step": 39300 }, { "epoch": 14.83, "learning_rate": 1.703424915318028e-05, "loss": 0.1144, "step": 39400 }, { "epoch": 14.87, "learning_rate": 1.702672186676703e-05, "loss": 0.1152, "step": 39500 }, { "epoch": 14.9, "learning_rate": 1.7019194580353783e-05, "loss": 0.1129, "step": 39600 }, { "epoch": 14.94, "learning_rate": 1.7011667293940536e-05, "loss": 0.1159, "step": 39700 }, { "epoch": 14.98, "learning_rate": 1.700414000752729e-05, "loss": 0.1137, "step": 39800 }, { "epoch": 15.0, "eval_loss": 0.11463519930839539, "eval_runtime": 44.7206, "eval_samples_per_second": 167.708, "eval_steps_per_second": 10.487, "step": 39855 }, { "epoch": 15.02, "learning_rate": 1.699661272111404e-05, "loss": 0.1136, "step": 39900 }, { "epoch": 15.05, "learning_rate": 1.698908543470079e-05, "loss": 0.1154, "step": 40000 }, { "epoch": 15.09, "learning_rate": 1.6981558148287546e-05, "loss": 0.1119, "step": 40100 }, { "epoch": 15.13, "learning_rate": 1.6974030861874295e-05, "loss": 0.1147, "step": 40200 }, { "epoch": 15.17, "learning_rate": 1.6966503575461048e-05, "loss": 0.1133, "step": 40300 }, { "epoch": 15.21, "learning_rate": 1.69589762890478e-05, "loss": 0.1159, "step": 40400 }, { "epoch": 15.24, "learning_rate": 1.6951449002634553e-05, "loss": 0.1123, "step": 40500 }, { "epoch": 15.28, "learning_rate": 1.6943921716221302e-05, "loss": 0.1144, "step": 40600 }, { "epoch": 15.32, "learning_rate": 1.6936394429808058e-05, "loss": 0.1156, "step": 40700 }, { "epoch": 15.36, "learning_rate": 1.6928867143394807e-05, "loss": 0.115, "step": 40800 }, { "epoch": 15.39, "learning_rate": 1.692133985698156e-05, "loss": 0.1129, "step": 40900 }, { "epoch": 15.43, "learning_rate": 1.6913812570568312e-05, "loss": 0.1136, "step": 41000 }, { "epoch": 15.47, "learning_rate": 1.6906285284155064e-05, "loss": 0.1127, "step": 41100 }, { "epoch": 15.51, "learning_rate": 1.6898757997741814e-05, "loss": 0.1119, "step": 41200 }, { "epoch": 15.54, "learning_rate": 1.689123071132857e-05, "loss": 0.1131, "step": 41300 }, { "epoch": 15.58, "learning_rate": 1.688370342491532e-05, "loss": 0.1112, "step": 41400 }, { "epoch": 15.62, "learning_rate": 1.687617613850207e-05, "loss": 0.1149, "step": 41500 }, { "epoch": 15.66, "learning_rate": 1.6868648852088824e-05, "loss": 0.1133, "step": 41600 }, { "epoch": 15.69, "learning_rate": 1.6861121565675576e-05, "loss": 0.1123, "step": 41700 }, { "epoch": 15.73, "learning_rate": 1.6853594279262325e-05, "loss": 0.1138, "step": 41800 }, { "epoch": 15.77, "learning_rate": 1.6846066992849078e-05, "loss": 0.1134, "step": 41900 }, { "epoch": 15.81, "learning_rate": 1.683853970643583e-05, "loss": 0.1138, "step": 42000 }, { "epoch": 15.84, "learning_rate": 1.6831012420022583e-05, "loss": 0.1122, "step": 42100 }, { "epoch": 15.88, "learning_rate": 1.6823485133609335e-05, "loss": 0.1127, "step": 42200 }, { "epoch": 15.92, "learning_rate": 1.6815957847196088e-05, "loss": 0.1147, "step": 42300 }, { "epoch": 15.96, "learning_rate": 1.680843056078284e-05, "loss": 0.1127, "step": 42400 }, { "epoch": 16.0, "learning_rate": 1.680090327436959e-05, "loss": 0.1125, "step": 42500 }, { "epoch": 16.0, "eval_loss": 0.1117386743426323, "eval_runtime": 44.2589, "eval_samples_per_second": 169.457, "eval_steps_per_second": 10.597, "step": 42512 }, { "epoch": 16.03, "learning_rate": 1.6793375987956345e-05, "loss": 0.1115, "step": 42600 }, { "epoch": 16.07, "learning_rate": 1.6785848701543094e-05, "loss": 0.1128, "step": 42700 }, { "epoch": 16.11, "learning_rate": 1.6778321415129847e-05, "loss": 0.1115, "step": 42800 }, { "epoch": 16.15, "learning_rate": 1.67707941287166e-05, "loss": 0.1119, "step": 42900 }, { "epoch": 16.18, "learning_rate": 1.6763266842303352e-05, "loss": 0.1126, "step": 43000 }, { "epoch": 16.22, "learning_rate": 1.67557395558901e-05, "loss": 0.1127, "step": 43100 }, { "epoch": 16.26, "learning_rate": 1.6748212269476857e-05, "loss": 0.1145, "step": 43200 }, { "epoch": 16.3, "learning_rate": 1.6740684983063606e-05, "loss": 0.1118, "step": 43300 }, { "epoch": 16.33, "learning_rate": 1.673315769665036e-05, "loss": 0.1129, "step": 43400 }, { "epoch": 16.37, "learning_rate": 1.672563041023711e-05, "loss": 0.113, "step": 43500 }, { "epoch": 16.41, "learning_rate": 1.6718103123823864e-05, "loss": 0.1129, "step": 43600 }, { "epoch": 16.45, "learning_rate": 1.6710575837410613e-05, "loss": 0.1098, "step": 43700 }, { "epoch": 16.48, "learning_rate": 1.670304855099737e-05, "loss": 0.112, "step": 43800 }, { "epoch": 16.52, "learning_rate": 1.6695521264584118e-05, "loss": 0.1117, "step": 43900 }, { "epoch": 16.56, "learning_rate": 1.668799397817087e-05, "loss": 0.1128, "step": 44000 }, { "epoch": 16.6, "learning_rate": 1.6680466691757623e-05, "loss": 0.1133, "step": 44100 }, { "epoch": 16.64, "learning_rate": 1.6672939405344375e-05, "loss": 0.11, "step": 44200 }, { "epoch": 16.67, "learning_rate": 1.6665412118931125e-05, "loss": 0.1138, "step": 44300 }, { "epoch": 16.71, "learning_rate": 1.665788483251788e-05, "loss": 0.1108, "step": 44400 }, { "epoch": 16.75, "learning_rate": 1.665035754610463e-05, "loss": 0.1096, "step": 44500 }, { "epoch": 16.79, "learning_rate": 1.6642830259691382e-05, "loss": 0.1092, "step": 44600 }, { "epoch": 16.82, "learning_rate": 1.6635302973278135e-05, "loss": 0.1106, "step": 44700 }, { "epoch": 16.86, "learning_rate": 1.6627775686864887e-05, "loss": 0.1118, "step": 44800 }, { "epoch": 16.9, "learning_rate": 1.662024840045164e-05, "loss": 0.1116, "step": 44900 }, { "epoch": 16.94, "learning_rate": 1.661272111403839e-05, "loss": 0.1095, "step": 45000 }, { "epoch": 16.97, "learning_rate": 1.6605193827625145e-05, "loss": 0.1112, "step": 45100 }, { "epoch": 17.0, "eval_loss": 0.10999125987291336, "eval_runtime": 44.3431, "eval_samples_per_second": 169.136, "eval_steps_per_second": 10.577, "step": 45169 }, { "epoch": 17.01, "learning_rate": 1.6597666541211894e-05, "loss": 0.11, "step": 45200 }, { "epoch": 17.05, "learning_rate": 1.6590139254798646e-05, "loss": 0.1107, "step": 45300 }, { "epoch": 17.09, "learning_rate": 1.65826119683854e-05, "loss": 0.1127, "step": 45400 }, { "epoch": 17.12, "learning_rate": 1.657508468197215e-05, "loss": 0.1106, "step": 45500 }, { "epoch": 17.16, "learning_rate": 1.65675573955589e-05, "loss": 0.1098, "step": 45600 }, { "epoch": 17.2, "learning_rate": 1.6560030109145656e-05, "loss": 0.111, "step": 45700 }, { "epoch": 17.24, "learning_rate": 1.6552502822732405e-05, "loss": 0.1094, "step": 45800 }, { "epoch": 17.28, "learning_rate": 1.6544975536319158e-05, "loss": 0.1098, "step": 45900 }, { "epoch": 17.31, "learning_rate": 1.653744824990591e-05, "loss": 0.11, "step": 46000 }, { "epoch": 17.35, "learning_rate": 1.6529920963492663e-05, "loss": 0.1101, "step": 46100 }, { "epoch": 17.39, "learning_rate": 1.6522393677079412e-05, "loss": 0.1113, "step": 46200 }, { "epoch": 17.43, "learning_rate": 1.6514866390666168e-05, "loss": 0.112, "step": 46300 }, { "epoch": 17.46, "learning_rate": 1.6507339104252917e-05, "loss": 0.1109, "step": 46400 }, { "epoch": 17.5, "learning_rate": 1.649981181783967e-05, "loss": 0.1108, "step": 46500 }, { "epoch": 17.54, "learning_rate": 1.6492284531426422e-05, "loss": 0.1092, "step": 46600 }, { "epoch": 17.58, "learning_rate": 1.6484757245013175e-05, "loss": 0.1097, "step": 46700 }, { "epoch": 17.61, "learning_rate": 1.6477229958599924e-05, "loss": 0.1098, "step": 46800 }, { "epoch": 17.65, "learning_rate": 1.646970267218668e-05, "loss": 0.1081, "step": 46900 }, { "epoch": 17.69, "learning_rate": 1.646217538577343e-05, "loss": 0.1085, "step": 47000 }, { "epoch": 17.73, "learning_rate": 1.645464809936018e-05, "loss": 0.1089, "step": 47100 }, { "epoch": 17.76, "learning_rate": 1.6447120812946934e-05, "loss": 0.1084, "step": 47200 }, { "epoch": 17.8, "learning_rate": 1.6439593526533686e-05, "loss": 0.1093, "step": 47300 }, { "epoch": 17.84, "learning_rate": 1.643206624012044e-05, "loss": 0.1115, "step": 47400 }, { "epoch": 17.88, "learning_rate": 1.642453895370719e-05, "loss": 0.1091, "step": 47500 }, { "epoch": 17.91, "learning_rate": 1.6417011667293944e-05, "loss": 0.11, "step": 47600 }, { "epoch": 17.95, "learning_rate": 1.6409484380880693e-05, "loss": 0.1096, "step": 47700 }, { "epoch": 17.99, "learning_rate": 1.6401957094467446e-05, "loss": 0.1108, "step": 47800 }, { "epoch": 18.0, "eval_loss": 0.10891053080558777, "eval_runtime": 44.4735, "eval_samples_per_second": 168.64, "eval_steps_per_second": 10.546, "step": 47826 }, { "epoch": 18.03, "learning_rate": 1.6394429808054198e-05, "loss": 0.1076, "step": 47900 }, { "epoch": 18.07, "learning_rate": 1.638690252164095e-05, "loss": 0.1092, "step": 48000 }, { "epoch": 18.1, "learning_rate": 1.63793752352277e-05, "loss": 0.1081, "step": 48100 }, { "epoch": 18.14, "learning_rate": 1.6371847948814456e-05, "loss": 0.1089, "step": 48200 }, { "epoch": 18.18, "learning_rate": 1.6364320662401205e-05, "loss": 0.1097, "step": 48300 }, { "epoch": 18.22, "learning_rate": 1.6356793375987957e-05, "loss": 0.11, "step": 48400 }, { "epoch": 18.25, "learning_rate": 1.634926608957471e-05, "loss": 0.107, "step": 48500 }, { "epoch": 18.29, "learning_rate": 1.6341738803161462e-05, "loss": 0.109, "step": 48600 }, { "epoch": 18.33, "learning_rate": 1.633421151674821e-05, "loss": 0.1092, "step": 48700 }, { "epoch": 18.37, "learning_rate": 1.6326684230334967e-05, "loss": 0.109, "step": 48800 }, { "epoch": 18.4, "learning_rate": 1.6319156943921716e-05, "loss": 0.1068, "step": 48900 }, { "epoch": 18.44, "learning_rate": 1.631162965750847e-05, "loss": 0.1091, "step": 49000 }, { "epoch": 18.48, "learning_rate": 1.630410237109522e-05, "loss": 0.1087, "step": 49100 }, { "epoch": 18.52, "learning_rate": 1.6296575084681974e-05, "loss": 0.1078, "step": 49200 }, { "epoch": 18.55, "learning_rate": 1.6289047798268723e-05, "loss": 0.109, "step": 49300 }, { "epoch": 18.59, "learning_rate": 1.628152051185548e-05, "loss": 0.1112, "step": 49400 }, { "epoch": 18.63, "learning_rate": 1.6273993225442228e-05, "loss": 0.1098, "step": 49500 }, { "epoch": 18.67, "learning_rate": 1.626646593902898e-05, "loss": 0.11, "step": 49600 }, { "epoch": 18.71, "learning_rate": 1.6258938652615733e-05, "loss": 0.1085, "step": 49700 }, { "epoch": 18.74, "learning_rate": 1.6251411366202486e-05, "loss": 0.1088, "step": 49800 }, { "epoch": 18.78, "learning_rate": 1.6243884079789238e-05, "loss": 0.1093, "step": 49900 }, { "epoch": 18.82, "learning_rate": 1.623635679337599e-05, "loss": 0.1069, "step": 50000 }, { "epoch": 18.86, "learning_rate": 1.6228829506962743e-05, "loss": 0.1082, "step": 50100 }, { "epoch": 18.89, "learning_rate": 1.6221302220549492e-05, "loss": 0.1093, "step": 50200 }, { "epoch": 18.93, "learning_rate": 1.6213774934136245e-05, "loss": 0.1074, "step": 50300 }, { "epoch": 18.97, "learning_rate": 1.6206247647722997e-05, "loss": 0.1061, "step": 50400 }, { "epoch": 19.0, "eval_loss": 0.10703522711992264, "eval_runtime": 45.2943, "eval_samples_per_second": 165.584, "eval_steps_per_second": 10.354, "step": 50483 }, { "epoch": 19.01, "learning_rate": 1.619872036130975e-05, "loss": 0.1082, "step": 50500 }, { "epoch": 19.04, "learning_rate": 1.6191193074896502e-05, "loss": 0.1093, "step": 50600 }, { "epoch": 19.08, "learning_rate": 1.6183665788483255e-05, "loss": 0.1078, "step": 50700 }, { "epoch": 19.12, "learning_rate": 1.6176138502070004e-05, "loss": 0.1069, "step": 50800 }, { "epoch": 19.16, "learning_rate": 1.6168611215656757e-05, "loss": 0.1092, "step": 50900 }, { "epoch": 19.19, "learning_rate": 1.616108392924351e-05, "loss": 0.1064, "step": 51000 }, { "epoch": 19.23, "learning_rate": 1.615355664283026e-05, "loss": 0.1063, "step": 51100 }, { "epoch": 19.27, "learning_rate": 1.614602935641701e-05, "loss": 0.1071, "step": 51200 }, { "epoch": 19.31, "learning_rate": 1.6138502070003767e-05, "loss": 0.1083, "step": 51300 }, { "epoch": 19.35, "learning_rate": 1.6130974783590516e-05, "loss": 0.1079, "step": 51400 }, { "epoch": 19.38, "learning_rate": 1.6123447497177268e-05, "loss": 0.1081, "step": 51500 }, { "epoch": 19.42, "learning_rate": 1.611592021076402e-05, "loss": 0.1086, "step": 51600 }, { "epoch": 19.46, "learning_rate": 1.6108392924350773e-05, "loss": 0.1073, "step": 51700 }, { "epoch": 19.5, "learning_rate": 1.6100865637937522e-05, "loss": 0.1082, "step": 51800 }, { "epoch": 19.53, "learning_rate": 1.6093338351524278e-05, "loss": 0.1081, "step": 51900 }, { "epoch": 19.57, "learning_rate": 1.6085811065111027e-05, "loss": 0.1081, "step": 52000 }, { "epoch": 19.61, "learning_rate": 1.607828377869778e-05, "loss": 0.1077, "step": 52100 }, { "epoch": 19.65, "learning_rate": 1.6070756492284532e-05, "loss": 0.108, "step": 52200 }, { "epoch": 19.68, "learning_rate": 1.6063229205871285e-05, "loss": 0.107, "step": 52300 }, { "epoch": 19.72, "learning_rate": 1.6055701919458037e-05, "loss": 0.1082, "step": 52400 }, { "epoch": 19.76, "learning_rate": 1.604817463304479e-05, "loss": 0.1082, "step": 52500 }, { "epoch": 19.8, "learning_rate": 1.6040647346631542e-05, "loss": 0.1067, "step": 52600 }, { "epoch": 19.83, "learning_rate": 1.603312006021829e-05, "loss": 0.1071, "step": 52700 }, { "epoch": 19.87, "learning_rate": 1.6025592773805044e-05, "loss": 0.1068, "step": 52800 }, { "epoch": 19.91, "learning_rate": 1.6018065487391797e-05, "loss": 0.1086, "step": 52900 }, { "epoch": 19.95, "learning_rate": 1.601053820097855e-05, "loss": 0.1085, "step": 53000 }, { "epoch": 19.98, "learning_rate": 1.60030109145653e-05, "loss": 0.1073, "step": 53100 }, { "epoch": 20.0, "eval_loss": 0.10757213830947876, "eval_runtime": 45.7115, "eval_samples_per_second": 164.072, "eval_steps_per_second": 10.26, "step": 53140 }, { "epoch": 20.02, "learning_rate": 1.5995483628152054e-05, "loss": 0.1084, "step": 53200 }, { "epoch": 20.06, "learning_rate": 1.5987956341738803e-05, "loss": 0.1089, "step": 53300 }, { "epoch": 20.1, "learning_rate": 1.5980429055325556e-05, "loss": 0.1085, "step": 53400 }, { "epoch": 20.14, "learning_rate": 1.597290176891231e-05, "loss": 0.1092, "step": 53500 }, { "epoch": 20.17, "learning_rate": 1.596537448249906e-05, "loss": 0.1073, "step": 53600 }, { "epoch": 20.21, "learning_rate": 1.5957847196085813e-05, "loss": 0.1087, "step": 53700 }, { "epoch": 20.25, "learning_rate": 1.5950319909672566e-05, "loss": 0.1071, "step": 53800 }, { "epoch": 20.29, "learning_rate": 1.5942792623259315e-05, "loss": 0.1061, "step": 53900 }, { "epoch": 20.32, "learning_rate": 1.5935265336846068e-05, "loss": 0.1055, "step": 54000 }, { "epoch": 20.36, "learning_rate": 1.592773805043282e-05, "loss": 0.1077, "step": 54100 }, { "epoch": 20.4, "learning_rate": 1.5920210764019573e-05, "loss": 0.108, "step": 54200 }, { "epoch": 20.44, "learning_rate": 1.591268347760632e-05, "loss": 0.1075, "step": 54300 }, { "epoch": 20.47, "learning_rate": 1.5905156191193078e-05, "loss": 0.1066, "step": 54400 }, { "epoch": 20.51, "learning_rate": 1.5897628904779827e-05, "loss": 0.1055, "step": 54500 }, { "epoch": 20.55, "learning_rate": 1.589010161836658e-05, "loss": 0.1069, "step": 54600 }, { "epoch": 20.59, "learning_rate": 1.5882574331953332e-05, "loss": 0.1068, "step": 54700 }, { "epoch": 20.62, "learning_rate": 1.5875047045540084e-05, "loss": 0.1065, "step": 54800 }, { "epoch": 20.66, "learning_rate": 1.5867519759126837e-05, "loss": 0.1053, "step": 54900 }, { "epoch": 20.7, "learning_rate": 1.585999247271359e-05, "loss": 0.1055, "step": 55000 }, { "epoch": 20.74, "learning_rate": 1.5852465186300342e-05, "loss": 0.107, "step": 55100 }, { "epoch": 20.78, "learning_rate": 1.584493789988709e-05, "loss": 0.1058, "step": 55200 }, { "epoch": 20.81, "learning_rate": 1.5837410613473843e-05, "loss": 0.1088, "step": 55300 }, { "epoch": 20.85, "learning_rate": 1.5829883327060596e-05, "loss": 0.1061, "step": 55400 }, { "epoch": 20.89, "learning_rate": 1.582235604064735e-05, "loss": 0.1066, "step": 55500 }, { "epoch": 20.93, "learning_rate": 1.58148287542341e-05, "loss": 0.1071, "step": 55600 }, { "epoch": 20.96, "learning_rate": 1.5807301467820853e-05, "loss": 0.1066, "step": 55700 }, { "epoch": 21.0, "eval_loss": 0.1060996800661087, "eval_runtime": 45.0415, "eval_samples_per_second": 166.513, "eval_steps_per_second": 10.413, "step": 55797 }, { "epoch": 21.0, "learning_rate": 1.5799774181407603e-05, "loss": 0.1054, "step": 55800 }, { "epoch": 21.04, "learning_rate": 1.5792246894994355e-05, "loss": 0.1053, "step": 55900 }, { "epoch": 21.08, "learning_rate": 1.5784719608581108e-05, "loss": 0.1055, "step": 56000 }, { "epoch": 21.11, "learning_rate": 1.577719232216786e-05, "loss": 0.1056, "step": 56100 }, { "epoch": 21.15, "learning_rate": 1.5769665035754613e-05, "loss": 0.106, "step": 56200 }, { "epoch": 21.19, "learning_rate": 1.5762137749341365e-05, "loss": 0.106, "step": 56300 }, { "epoch": 21.23, "learning_rate": 1.5754610462928114e-05, "loss": 0.1056, "step": 56400 }, { "epoch": 21.26, "learning_rate": 1.5747083176514867e-05, "loss": 0.1069, "step": 56500 }, { "epoch": 21.3, "learning_rate": 1.573955589010162e-05, "loss": 0.1054, "step": 56600 }, { "epoch": 21.34, "learning_rate": 1.5732028603688372e-05, "loss": 0.1087, "step": 56700 }, { "epoch": 21.38, "learning_rate": 1.5724501317275124e-05, "loss": 0.1085, "step": 56800 }, { "epoch": 21.42, "learning_rate": 1.5716974030861877e-05, "loss": 0.1052, "step": 56900 }, { "epoch": 21.45, "learning_rate": 1.5709446744448626e-05, "loss": 0.1072, "step": 57000 }, { "epoch": 21.49, "learning_rate": 1.570191945803538e-05, "loss": 0.1055, "step": 57100 }, { "epoch": 21.53, "learning_rate": 1.569439217162213e-05, "loss": 0.1067, "step": 57200 }, { "epoch": 21.57, "learning_rate": 1.5686864885208884e-05, "loss": 0.1057, "step": 57300 }, { "epoch": 21.6, "learning_rate": 1.5679337598795636e-05, "loss": 0.1057, "step": 57400 }, { "epoch": 21.64, "learning_rate": 1.567181031238239e-05, "loss": 0.106, "step": 57500 }, { "epoch": 21.68, "learning_rate": 1.566428302596914e-05, "loss": 0.1022, "step": 57600 }, { "epoch": 21.72, "learning_rate": 1.565675573955589e-05, "loss": 0.1066, "step": 57700 }, { "epoch": 21.75, "learning_rate": 1.5649228453142643e-05, "loss": 0.1063, "step": 57800 }, { "epoch": 21.79, "learning_rate": 1.5641701166729395e-05, "loss": 0.1072, "step": 57900 }, { "epoch": 21.83, "learning_rate": 1.5634173880316148e-05, "loss": 0.1044, "step": 58000 }, { "epoch": 21.87, "learning_rate": 1.56266465939029e-05, "loss": 0.1071, "step": 58100 }, { "epoch": 21.9, "learning_rate": 1.5619119307489653e-05, "loss": 0.1058, "step": 58200 }, { "epoch": 21.94, "learning_rate": 1.5611592021076402e-05, "loss": 0.1072, "step": 58300 }, { "epoch": 21.98, "learning_rate": 1.5604064734663154e-05, "loss": 0.1065, "step": 58400 }, { "epoch": 22.0, "eval_loss": 0.10562047362327576, "eval_runtime": 44.6592, "eval_samples_per_second": 167.938, "eval_steps_per_second": 10.502, "step": 58454 }, { "epoch": 22.02, "learning_rate": 1.5596537448249907e-05, "loss": 0.1042, "step": 58500 }, { "epoch": 22.05, "learning_rate": 1.558901016183666e-05, "loss": 0.1051, "step": 58600 }, { "epoch": 22.09, "learning_rate": 1.5581482875423412e-05, "loss": 0.1075, "step": 58700 }, { "epoch": 22.13, "learning_rate": 1.5573955589010164e-05, "loss": 0.1052, "step": 58800 }, { "epoch": 22.17, "learning_rate": 1.5566428302596914e-05, "loss": 0.1039, "step": 58900 }, { "epoch": 22.21, "learning_rate": 1.5558901016183666e-05, "loss": 0.1063, "step": 59000 }, { "epoch": 22.24, "learning_rate": 1.555137372977042e-05, "loss": 0.1048, "step": 59100 }, { "epoch": 22.28, "learning_rate": 1.554384644335717e-05, "loss": 0.1056, "step": 59200 }, { "epoch": 22.32, "learning_rate": 1.5536319156943924e-05, "loss": 0.1064, "step": 59300 }, { "epoch": 22.36, "learning_rate": 1.5528791870530676e-05, "loss": 0.1055, "step": 59400 }, { "epoch": 22.39, "learning_rate": 1.5521264584117425e-05, "loss": 0.1065, "step": 59500 }, { "epoch": 22.43, "learning_rate": 1.5513737297704178e-05, "loss": 0.1063, "step": 59600 }, { "epoch": 22.47, "learning_rate": 1.550621001129093e-05, "loss": 0.1045, "step": 59700 }, { "epoch": 22.51, "learning_rate": 1.5498682724877683e-05, "loss": 0.1041, "step": 59800 }, { "epoch": 22.54, "learning_rate": 1.5491155438464435e-05, "loss": 0.1057, "step": 59900 }, { "epoch": 22.58, "learning_rate": 1.5483628152051188e-05, "loss": 0.1048, "step": 60000 }, { "epoch": 22.62, "learning_rate": 1.547610086563794e-05, "loss": 0.1055, "step": 60100 }, { "epoch": 22.66, "learning_rate": 1.546857357922469e-05, "loss": 0.1021, "step": 60200 }, { "epoch": 22.69, "learning_rate": 1.5461046292811442e-05, "loss": 0.1044, "step": 60300 }, { "epoch": 22.73, "learning_rate": 1.5453519006398194e-05, "loss": 0.1056, "step": 60400 }, { "epoch": 22.77, "learning_rate": 1.5445991719984947e-05, "loss": 0.1057, "step": 60500 }, { "epoch": 22.81, "learning_rate": 1.54384644335717e-05, "loss": 0.1058, "step": 60600 }, { "epoch": 22.85, "learning_rate": 1.5430937147158452e-05, "loss": 0.1059, "step": 60700 }, { "epoch": 22.88, "learning_rate": 1.54234098607452e-05, "loss": 0.1058, "step": 60800 }, { "epoch": 22.92, "learning_rate": 1.5415882574331954e-05, "loss": 0.1052, "step": 60900 }, { "epoch": 22.96, "learning_rate": 1.5408355287918706e-05, "loss": 0.1062, "step": 61000 }, { "epoch": 23.0, "learning_rate": 1.540082800150546e-05, "loss": 0.1045, "step": 61100 }, { "epoch": 23.0, "eval_loss": 0.10369115322828293, "eval_runtime": 44.6965, "eval_samples_per_second": 167.798, "eval_steps_per_second": 10.493, "step": 61111 }, { "epoch": 23.03, "learning_rate": 1.539330071509221e-05, "loss": 0.1056, "step": 61200 }, { "epoch": 23.07, "learning_rate": 1.5385773428678964e-05, "loss": 0.1034, "step": 61300 }, { "epoch": 23.11, "learning_rate": 1.5378246142265713e-05, "loss": 0.1056, "step": 61400 }, { "epoch": 23.15, "learning_rate": 1.5370718855852465e-05, "loss": 0.1055, "step": 61500 }, { "epoch": 23.18, "learning_rate": 1.5363191569439218e-05, "loss": 0.1053, "step": 61600 }, { "epoch": 23.22, "learning_rate": 1.535566428302597e-05, "loss": 0.1049, "step": 61700 }, { "epoch": 23.26, "learning_rate": 1.5348136996612723e-05, "loss": 0.1062, "step": 61800 }, { "epoch": 23.3, "learning_rate": 1.5340609710199475e-05, "loss": 0.1038, "step": 61900 }, { "epoch": 23.33, "learning_rate": 1.5333082423786225e-05, "loss": 0.1048, "step": 62000 }, { "epoch": 23.37, "learning_rate": 1.5325555137372977e-05, "loss": 0.1054, "step": 62100 }, { "epoch": 23.41, "learning_rate": 1.5318027850959733e-05, "loss": 0.1041, "step": 62200 }, { "epoch": 23.45, "learning_rate": 1.5310500564546482e-05, "loss": 0.1048, "step": 62300 }, { "epoch": 23.49, "learning_rate": 1.5302973278133235e-05, "loss": 0.105, "step": 62400 }, { "epoch": 23.52, "learning_rate": 1.5295445991719987e-05, "loss": 0.1023, "step": 62500 }, { "epoch": 23.56, "learning_rate": 1.528791870530674e-05, "loss": 0.105, "step": 62600 }, { "epoch": 23.6, "learning_rate": 1.528039141889349e-05, "loss": 0.1046, "step": 62700 }, { "epoch": 23.64, "learning_rate": 1.527286413248024e-05, "loss": 0.1033, "step": 62800 }, { "epoch": 23.67, "learning_rate": 1.5265336846066994e-05, "loss": 0.1057, "step": 62900 }, { "epoch": 23.71, "learning_rate": 1.5257809559653746e-05, "loss": 0.1056, "step": 63000 }, { "epoch": 23.75, "learning_rate": 1.5250282273240497e-05, "loss": 0.1056, "step": 63100 }, { "epoch": 23.79, "learning_rate": 1.5242754986827251e-05, "loss": 0.1032, "step": 63200 }, { "epoch": 23.82, "learning_rate": 1.5235227700414002e-05, "loss": 0.1025, "step": 63300 }, { "epoch": 23.86, "learning_rate": 1.5227700414000755e-05, "loss": 0.1063, "step": 63400 }, { "epoch": 23.9, "learning_rate": 1.5220173127587505e-05, "loss": 0.1035, "step": 63500 }, { "epoch": 23.94, "learning_rate": 1.5212645841174258e-05, "loss": 0.104, "step": 63600 }, { "epoch": 23.97, "learning_rate": 1.5205118554761009e-05, "loss": 0.1052, "step": 63700 }, { "epoch": 24.0, "eval_loss": 0.10549741983413696, "eval_runtime": 43.683, "eval_samples_per_second": 171.691, "eval_steps_per_second": 10.736, "step": 63768 }, { "epoch": 24.01, "learning_rate": 1.5197591268347763e-05, "loss": 0.1051, "step": 63800 }, { "epoch": 24.05, "learning_rate": 1.5190063981934514e-05, "loss": 0.1031, "step": 63900 }, { "epoch": 24.09, "learning_rate": 1.5182536695521266e-05, "loss": 0.1057, "step": 64000 }, { "epoch": 24.12, "learning_rate": 1.5175009409108017e-05, "loss": 0.1048, "step": 64100 }, { "epoch": 24.16, "learning_rate": 1.516748212269477e-05, "loss": 0.1054, "step": 64200 }, { "epoch": 24.2, "learning_rate": 1.515995483628152e-05, "loss": 0.1042, "step": 64300 }, { "epoch": 24.24, "learning_rate": 1.5152427549868273e-05, "loss": 0.1049, "step": 64400 }, { "epoch": 24.28, "learning_rate": 1.5144900263455024e-05, "loss": 0.1039, "step": 64500 }, { "epoch": 24.31, "learning_rate": 1.5137372977041778e-05, "loss": 0.1039, "step": 64600 }, { "epoch": 24.35, "learning_rate": 1.512984569062853e-05, "loss": 0.104, "step": 64700 }, { "epoch": 24.39, "learning_rate": 1.5122318404215281e-05, "loss": 0.1039, "step": 64800 }, { "epoch": 24.43, "learning_rate": 1.5114791117802034e-05, "loss": 0.1031, "step": 64900 }, { "epoch": 24.46, "learning_rate": 1.5107263831388785e-05, "loss": 0.1019, "step": 65000 }, { "epoch": 24.5, "learning_rate": 1.5099736544975539e-05, "loss": 0.1041, "step": 65100 }, { "epoch": 24.54, "learning_rate": 1.509220925856229e-05, "loss": 0.1049, "step": 65200 }, { "epoch": 24.58, "learning_rate": 1.5084681972149042e-05, "loss": 0.1029, "step": 65300 }, { "epoch": 24.61, "learning_rate": 1.5077154685735793e-05, "loss": 0.105, "step": 65400 }, { "epoch": 24.65, "learning_rate": 1.5069627399322546e-05, "loss": 0.1041, "step": 65500 }, { "epoch": 24.69, "learning_rate": 1.5062100112909296e-05, "loss": 0.1032, "step": 65600 }, { "epoch": 24.73, "learning_rate": 1.505457282649605e-05, "loss": 0.1033, "step": 65700 }, { "epoch": 24.76, "learning_rate": 1.5047045540082801e-05, "loss": 0.1036, "step": 65800 }, { "epoch": 24.8, "learning_rate": 1.5039518253669554e-05, "loss": 0.1031, "step": 65900 }, { "epoch": 24.84, "learning_rate": 1.5031990967256305e-05, "loss": 0.1031, "step": 66000 }, { "epoch": 24.88, "learning_rate": 1.5024463680843057e-05, "loss": 0.103, "step": 66100 }, { "epoch": 24.92, "learning_rate": 1.5016936394429808e-05, "loss": 0.103, "step": 66200 }, { "epoch": 24.95, "learning_rate": 1.5009409108016562e-05, "loss": 0.103, "step": 66300 }, { "epoch": 24.99, "learning_rate": 1.5001881821603313e-05, "loss": 0.102, "step": 66400 }, { "epoch": 25.0, "eval_loss": 0.10278935730457306, "eval_runtime": 43.9205, "eval_samples_per_second": 170.763, "eval_steps_per_second": 10.678, "step": 66425 }, { "epoch": 25.03, "learning_rate": 1.4994354535190066e-05, "loss": 0.1033, "step": 66500 }, { "epoch": 25.07, "learning_rate": 1.4986827248776816e-05, "loss": 0.1022, "step": 66600 }, { "epoch": 25.1, "learning_rate": 1.4979299962363569e-05, "loss": 0.1028, "step": 66700 }, { "epoch": 25.14, "learning_rate": 1.497177267595032e-05, "loss": 0.1044, "step": 66800 }, { "epoch": 25.18, "learning_rate": 1.4964245389537074e-05, "loss": 0.1038, "step": 66900 }, { "epoch": 25.22, "learning_rate": 1.4956718103123825e-05, "loss": 0.1052, "step": 67000 }, { "epoch": 25.25, "learning_rate": 1.4949190816710577e-05, "loss": 0.1039, "step": 67100 }, { "epoch": 25.29, "learning_rate": 1.494166353029733e-05, "loss": 0.1039, "step": 67200 }, { "epoch": 25.33, "learning_rate": 1.493413624388408e-05, "loss": 0.1025, "step": 67300 }, { "epoch": 25.37, "learning_rate": 1.4926608957470833e-05, "loss": 0.1037, "step": 67400 }, { "epoch": 25.4, "learning_rate": 1.4919081671057584e-05, "loss": 0.1019, "step": 67500 }, { "epoch": 25.44, "learning_rate": 1.4911554384644338e-05, "loss": 0.1027, "step": 67600 }, { "epoch": 25.48, "learning_rate": 1.4904027098231089e-05, "loss": 0.1037, "step": 67700 }, { "epoch": 25.52, "learning_rate": 1.4896499811817842e-05, "loss": 0.1031, "step": 67800 }, { "epoch": 25.56, "learning_rate": 1.4888972525404592e-05, "loss": 0.1035, "step": 67900 }, { "epoch": 25.59, "learning_rate": 1.4881445238991345e-05, "loss": 0.1031, "step": 68000 }, { "epoch": 25.63, "learning_rate": 1.4873917952578096e-05, "loss": 0.1034, "step": 68100 }, { "epoch": 25.67, "learning_rate": 1.486639066616485e-05, "loss": 0.1037, "step": 68200 }, { "epoch": 25.71, "learning_rate": 1.48588633797516e-05, "loss": 0.104, "step": 68300 }, { "epoch": 25.74, "learning_rate": 1.4851336093338353e-05, "loss": 0.1036, "step": 68400 }, { "epoch": 25.78, "learning_rate": 1.4843808806925104e-05, "loss": 0.1031, "step": 68500 }, { "epoch": 25.82, "learning_rate": 1.4836281520511857e-05, "loss": 0.1027, "step": 68600 }, { "epoch": 25.86, "learning_rate": 1.4828754234098607e-05, "loss": 0.1036, "step": 68700 }, { "epoch": 25.89, "learning_rate": 1.4821226947685362e-05, "loss": 0.1023, "step": 68800 }, { "epoch": 25.93, "learning_rate": 1.4813699661272112e-05, "loss": 0.1015, "step": 68900 }, { "epoch": 25.97, "learning_rate": 1.4806172374858865e-05, "loss": 0.1025, "step": 69000 }, { "epoch": 26.0, "eval_loss": 0.10342206060886383, "eval_runtime": 45.4198, "eval_samples_per_second": 165.126, "eval_steps_per_second": 10.326, "step": 69082 }, { "epoch": 26.01, "learning_rate": 1.4798645088445616e-05, "loss": 0.1011, "step": 69100 }, { "epoch": 26.04, "learning_rate": 1.4791117802032368e-05, "loss": 0.1021, "step": 69200 }, { "epoch": 26.08, "learning_rate": 1.4783590515619119e-05, "loss": 0.1036, "step": 69300 }, { "epoch": 26.12, "learning_rate": 1.4776063229205873e-05, "loss": 0.1041, "step": 69400 }, { "epoch": 26.16, "learning_rate": 1.4768535942792624e-05, "loss": 0.1028, "step": 69500 }, { "epoch": 26.19, "learning_rate": 1.4761008656379377e-05, "loss": 0.1024, "step": 69600 }, { "epoch": 26.23, "learning_rate": 1.4753481369966129e-05, "loss": 0.1019, "step": 69700 }, { "epoch": 26.27, "learning_rate": 1.474595408355288e-05, "loss": 0.1022, "step": 69800 }, { "epoch": 26.31, "learning_rate": 1.4738426797139634e-05, "loss": 0.102, "step": 69900 }, { "epoch": 26.35, "learning_rate": 1.4730899510726385e-05, "loss": 0.1008, "step": 70000 }, { "epoch": 26.38, "learning_rate": 1.4723372224313137e-05, "loss": 0.1032, "step": 70100 }, { "epoch": 26.42, "learning_rate": 1.4715844937899888e-05, "loss": 0.1036, "step": 70200 }, { "epoch": 26.46, "learning_rate": 1.470831765148664e-05, "loss": 0.103, "step": 70300 }, { "epoch": 26.5, "learning_rate": 1.4700790365073392e-05, "loss": 0.1016, "step": 70400 }, { "epoch": 26.53, "learning_rate": 1.4693263078660144e-05, "loss": 0.1022, "step": 70500 }, { "epoch": 26.57, "learning_rate": 1.4685735792246895e-05, "loss": 0.1033, "step": 70600 }, { "epoch": 26.61, "learning_rate": 1.4678208505833649e-05, "loss": 0.1032, "step": 70700 }, { "epoch": 26.65, "learning_rate": 1.46706812194204e-05, "loss": 0.1026, "step": 70800 }, { "epoch": 26.68, "learning_rate": 1.4663153933007152e-05, "loss": 0.1019, "step": 70900 }, { "epoch": 26.72, "learning_rate": 1.4655626646593903e-05, "loss": 0.1035, "step": 71000 }, { "epoch": 26.76, "learning_rate": 1.4648099360180656e-05, "loss": 0.102, "step": 71100 }, { "epoch": 26.8, "learning_rate": 1.4640572073767407e-05, "loss": 0.1026, "step": 71200 }, { "epoch": 26.83, "learning_rate": 1.463304478735416e-05, "loss": 0.1023, "step": 71300 }, { "epoch": 26.87, "learning_rate": 1.4625517500940912e-05, "loss": 0.1011, "step": 71400 }, { "epoch": 26.91, "learning_rate": 1.4617990214527664e-05, "loss": 0.1037, "step": 71500 }, { "epoch": 26.95, "learning_rate": 1.4610462928114415e-05, "loss": 0.1036, "step": 71600 }, { "epoch": 26.99, "learning_rate": 1.4602935641701168e-05, "loss": 0.1037, "step": 71700 }, { "epoch": 27.0, "eval_loss": 0.10246068239212036, "eval_runtime": 45.3187, "eval_samples_per_second": 165.495, "eval_steps_per_second": 10.349, "step": 71739 }, { "epoch": 27.02, "learning_rate": 1.4595408355287918e-05, "loss": 0.1032, "step": 71800 }, { "epoch": 27.06, "learning_rate": 1.4587881068874673e-05, "loss": 0.1021, "step": 71900 }, { "epoch": 27.1, "learning_rate": 1.4580353782461423e-05, "loss": 0.1034, "step": 72000 }, { "epoch": 27.14, "learning_rate": 1.4572826496048176e-05, "loss": 0.1029, "step": 72100 }, { "epoch": 27.17, "learning_rate": 1.4565299209634928e-05, "loss": 0.1024, "step": 72200 }, { "epoch": 27.21, "learning_rate": 1.455777192322168e-05, "loss": 0.1028, "step": 72300 }, { "epoch": 27.25, "learning_rate": 1.4550244636808433e-05, "loss": 0.1053, "step": 72400 }, { "epoch": 27.29, "learning_rate": 1.4542717350395184e-05, "loss": 0.1013, "step": 72500 }, { "epoch": 27.32, "learning_rate": 1.4535190063981937e-05, "loss": 0.1012, "step": 72600 }, { "epoch": 27.36, "learning_rate": 1.4527662777568688e-05, "loss": 0.1024, "step": 72700 }, { "epoch": 27.4, "learning_rate": 1.452013549115544e-05, "loss": 0.1003, "step": 72800 }, { "epoch": 27.44, "learning_rate": 1.4512608204742191e-05, "loss": 0.103, "step": 72900 }, { "epoch": 27.47, "learning_rate": 1.4505080918328945e-05, "loss": 0.1012, "step": 73000 }, { "epoch": 27.51, "learning_rate": 1.4497553631915696e-05, "loss": 0.1015, "step": 73100 }, { "epoch": 27.55, "learning_rate": 1.4490026345502448e-05, "loss": 0.1032, "step": 73200 }, { "epoch": 27.59, "learning_rate": 1.44824990590892e-05, "loss": 0.1038, "step": 73300 }, { "epoch": 27.63, "learning_rate": 1.4474971772675952e-05, "loss": 0.0996, "step": 73400 }, { "epoch": 27.66, "learning_rate": 1.4467444486262703e-05, "loss": 0.1024, "step": 73500 }, { "epoch": 27.7, "learning_rate": 1.4459917199849455e-05, "loss": 0.1019, "step": 73600 }, { "epoch": 27.74, "learning_rate": 1.4452389913436206e-05, "loss": 0.1024, "step": 73700 }, { "epoch": 27.78, "learning_rate": 1.444486262702296e-05, "loss": 0.1006, "step": 73800 }, { "epoch": 27.81, "learning_rate": 1.4437335340609711e-05, "loss": 0.1014, "step": 73900 }, { "epoch": 27.85, "learning_rate": 1.4429808054196463e-05, "loss": 0.1024, "step": 74000 }, { "epoch": 27.89, "learning_rate": 1.4422280767783214e-05, "loss": 0.1023, "step": 74100 }, { "epoch": 27.93, "learning_rate": 1.4414753481369967e-05, "loss": 0.1034, "step": 74200 }, { "epoch": 27.96, "learning_rate": 1.4407226194956718e-05, "loss": 0.1022, "step": 74300 }, { "epoch": 28.0, "eval_loss": 0.10144173353910446, "eval_runtime": 45.583, "eval_samples_per_second": 164.535, "eval_steps_per_second": 10.289, "step": 74396 }, { "epoch": 28.0, "learning_rate": 1.4399698908543472e-05, "loss": 0.1023, "step": 74400 }, { "epoch": 28.04, "learning_rate": 1.4392171622130224e-05, "loss": 0.1017, "step": 74500 }, { "epoch": 28.08, "learning_rate": 1.4384644335716975e-05, "loss": 0.1017, "step": 74600 }, { "epoch": 28.11, "learning_rate": 1.4377117049303728e-05, "loss": 0.1008, "step": 74700 }, { "epoch": 28.15, "learning_rate": 1.4369589762890478e-05, "loss": 0.1023, "step": 74800 }, { "epoch": 28.19, "learning_rate": 1.4362062476477233e-05, "loss": 0.1022, "step": 74900 }, { "epoch": 28.23, "learning_rate": 1.4354535190063984e-05, "loss": 0.1017, "step": 75000 }, { "epoch": 28.26, "learning_rate": 1.4347007903650736e-05, "loss": 0.1012, "step": 75100 }, { "epoch": 28.3, "learning_rate": 1.4339480617237487e-05, "loss": 0.103, "step": 75200 }, { "epoch": 28.34, "learning_rate": 1.433195333082424e-05, "loss": 0.1025, "step": 75300 }, { "epoch": 28.38, "learning_rate": 1.432442604441099e-05, "loss": 0.1013, "step": 75400 }, { "epoch": 28.42, "learning_rate": 1.4316898757997744e-05, "loss": 0.1021, "step": 75500 }, { "epoch": 28.45, "learning_rate": 1.4309371471584495e-05, "loss": 0.1026, "step": 75600 }, { "epoch": 28.49, "learning_rate": 1.4301844185171248e-05, "loss": 0.1027, "step": 75700 }, { "epoch": 28.53, "learning_rate": 1.4294316898757999e-05, "loss": 0.1014, "step": 75800 }, { "epoch": 28.57, "learning_rate": 1.4286789612344751e-05, "loss": 0.1013, "step": 75900 }, { "epoch": 28.6, "learning_rate": 1.4279262325931502e-05, "loss": 0.1018, "step": 76000 }, { "epoch": 28.64, "learning_rate": 1.4271735039518254e-05, "loss": 0.102, "step": 76100 }, { "epoch": 28.68, "learning_rate": 1.4264207753105005e-05, "loss": 0.1013, "step": 76200 }, { "epoch": 28.72, "learning_rate": 1.425668046669176e-05, "loss": 0.1027, "step": 76300 }, { "epoch": 28.75, "learning_rate": 1.424915318027851e-05, "loss": 0.1004, "step": 76400 }, { "epoch": 28.79, "learning_rate": 1.4241625893865263e-05, "loss": 0.1013, "step": 76500 }, { "epoch": 28.83, "learning_rate": 1.4234098607452014e-05, "loss": 0.1021, "step": 76600 }, { "epoch": 28.87, "learning_rate": 1.4226571321038766e-05, "loss": 0.1009, "step": 76700 }, { "epoch": 28.9, "learning_rate": 1.4219044034625517e-05, "loss": 0.1012, "step": 76800 }, { "epoch": 28.94, "learning_rate": 1.4211516748212271e-05, "loss": 0.1015, "step": 76900 }, { "epoch": 28.98, "learning_rate": 1.4203989461799024e-05, "loss": 0.1026, "step": 77000 }, { "epoch": 29.0, "eval_loss": 0.10109123587608337, "eval_runtime": 45.9148, "eval_samples_per_second": 163.346, "eval_steps_per_second": 10.215, "step": 77053 }, { "epoch": 29.02, "learning_rate": 1.4196462175385774e-05, "loss": 0.102, "step": 77100 }, { "epoch": 29.06, "learning_rate": 1.4188934888972527e-05, "loss": 0.1028, "step": 77200 }, { "epoch": 29.09, "learning_rate": 1.4181407602559278e-05, "loss": 0.1016, "step": 77300 }, { "epoch": 29.13, "learning_rate": 1.4173880316146032e-05, "loss": 0.1021, "step": 77400 }, { "epoch": 29.17, "learning_rate": 1.4166353029732783e-05, "loss": 0.1018, "step": 77500 }, { "epoch": 29.21, "learning_rate": 1.4158825743319535e-05, "loss": 0.1004, "step": 77600 }, { "epoch": 29.24, "learning_rate": 1.4151298456906286e-05, "loss": 0.102, "step": 77700 }, { "epoch": 29.28, "learning_rate": 1.4143771170493039e-05, "loss": 0.1013, "step": 77800 }, { "epoch": 29.32, "learning_rate": 1.413624388407979e-05, "loss": 0.1014, "step": 77900 }, { "epoch": 29.36, "learning_rate": 1.4128716597666544e-05, "loss": 0.1003, "step": 78000 }, { "epoch": 29.39, "learning_rate": 1.4121189311253294e-05, "loss": 0.1009, "step": 78100 }, { "epoch": 29.43, "learning_rate": 1.4113662024840047e-05, "loss": 0.1008, "step": 78200 }, { "epoch": 29.47, "learning_rate": 1.4106134738426798e-05, "loss": 0.1015, "step": 78300 }, { "epoch": 29.51, "learning_rate": 1.409860745201355e-05, "loss": 0.1019, "step": 78400 }, { "epoch": 29.54, "learning_rate": 1.4091080165600301e-05, "loss": 0.1014, "step": 78500 }, { "epoch": 29.58, "learning_rate": 1.4083552879187055e-05, "loss": 0.1009, "step": 78600 }, { "epoch": 29.62, "learning_rate": 1.4076025592773806e-05, "loss": 0.1013, "step": 78700 }, { "epoch": 29.66, "learning_rate": 1.4068498306360559e-05, "loss": 0.1018, "step": 78800 }, { "epoch": 29.7, "learning_rate": 1.406097101994731e-05, "loss": 0.1026, "step": 78900 }, { "epoch": 29.73, "learning_rate": 1.4053443733534062e-05, "loss": 0.1005, "step": 79000 }, { "epoch": 29.77, "learning_rate": 1.4045916447120813e-05, "loss": 0.1009, "step": 79100 }, { "epoch": 29.81, "learning_rate": 1.4038389160707565e-05, "loss": 0.1001, "step": 79200 }, { "epoch": 29.85, "learning_rate": 1.4030861874294316e-05, "loss": 0.102, "step": 79300 }, { "epoch": 29.88, "learning_rate": 1.402333458788107e-05, "loss": 0.1011, "step": 79400 }, { "epoch": 29.92, "learning_rate": 1.4015807301467823e-05, "loss": 0.1011, "step": 79500 }, { "epoch": 29.96, "learning_rate": 1.4008280015054574e-05, "loss": 0.1011, "step": 79600 }, { "epoch": 30.0, "learning_rate": 1.4000752728641326e-05, "loss": 0.1022, "step": 79700 }, { "epoch": 30.0, "eval_loss": 0.10009202361106873, "eval_runtime": 45.5608, "eval_samples_per_second": 164.615, "eval_steps_per_second": 10.294, "step": 79710 }, { "epoch": 30.03, "learning_rate": 1.3993225442228077e-05, "loss": 0.1027, "step": 79800 }, { "epoch": 30.07, "learning_rate": 1.3985698155814831e-05, "loss": 0.1015, "step": 79900 }, { "epoch": 30.11, "learning_rate": 1.3978170869401582e-05, "loss": 0.1018, "step": 80000 }, { "epoch": 30.15, "learning_rate": 1.3970643582988335e-05, "loss": 0.1014, "step": 80100 }, { "epoch": 30.18, "learning_rate": 1.3963116296575085e-05, "loss": 0.1013, "step": 80200 }, { "epoch": 30.22, "learning_rate": 1.3955589010161838e-05, "loss": 0.0992, "step": 80300 }, { "epoch": 30.26, "learning_rate": 1.3948061723748589e-05, "loss": 0.1029, "step": 80400 }, { "epoch": 30.3, "learning_rate": 1.3940534437335343e-05, "loss": 0.1009, "step": 80500 }, { "epoch": 30.33, "learning_rate": 1.3933007150922094e-05, "loss": 0.1011, "step": 80600 }, { "epoch": 30.37, "learning_rate": 1.3925479864508846e-05, "loss": 0.1003, "step": 80700 }, { "epoch": 30.41, "learning_rate": 1.3917952578095597e-05, "loss": 0.1011, "step": 80800 }, { "epoch": 30.45, "learning_rate": 1.391042529168235e-05, "loss": 0.1003, "step": 80900 }, { "epoch": 30.49, "learning_rate": 1.39028980052691e-05, "loss": 0.1011, "step": 81000 }, { "epoch": 30.52, "learning_rate": 1.3895370718855855e-05, "loss": 0.1004, "step": 81100 }, { "epoch": 30.56, "learning_rate": 1.3887843432442605e-05, "loss": 0.1006, "step": 81200 }, { "epoch": 30.6, "learning_rate": 1.3880316146029358e-05, "loss": 0.1019, "step": 81300 }, { "epoch": 30.64, "learning_rate": 1.3872788859616109e-05, "loss": 0.1013, "step": 81400 }, { "epoch": 30.67, "learning_rate": 1.3865261573202861e-05, "loss": 0.0997, "step": 81500 }, { "epoch": 30.71, "learning_rate": 1.3857734286789612e-05, "loss": 0.1, "step": 81600 }, { "epoch": 30.75, "learning_rate": 1.3850207000376366e-05, "loss": 0.1004, "step": 81700 }, { "epoch": 30.79, "learning_rate": 1.3842679713963117e-05, "loss": 0.101, "step": 81800 }, { "epoch": 30.82, "learning_rate": 1.383515242754987e-05, "loss": 0.1003, "step": 81900 }, { "epoch": 30.86, "learning_rate": 1.3827625141136622e-05, "loss": 0.1, "step": 82000 }, { "epoch": 30.9, "learning_rate": 1.3820097854723373e-05, "loss": 0.1003, "step": 82100 }, { "epoch": 30.94, "learning_rate": 1.3812570568310126e-05, "loss": 0.0997, "step": 82200 }, { "epoch": 30.97, "learning_rate": 1.3805043281896876e-05, "loss": 0.0997, "step": 82300 }, { "epoch": 31.0, "eval_loss": 0.10071013867855072, "eval_runtime": 45.6947, "eval_samples_per_second": 164.133, "eval_steps_per_second": 10.264, "step": 82367 }, { "epoch": 31.01, "learning_rate": 1.379751599548363e-05, "loss": 0.0994, "step": 82400 }, { "epoch": 31.05, "learning_rate": 1.3789988709070381e-05, "loss": 0.1028, "step": 82500 }, { "epoch": 31.09, "learning_rate": 1.3782461422657134e-05, "loss": 0.1002, "step": 82600 }, { "epoch": 31.13, "learning_rate": 1.3774934136243885e-05, "loss": 0.0998, "step": 82700 }, { "epoch": 31.16, "learning_rate": 1.3767406849830637e-05, "loss": 0.1013, "step": 82800 }, { "epoch": 31.2, "learning_rate": 1.3759879563417388e-05, "loss": 0.1021, "step": 82900 }, { "epoch": 31.24, "learning_rate": 1.3752352277004142e-05, "loss": 0.1008, "step": 83000 }, { "epoch": 31.28, "learning_rate": 1.3744824990590893e-05, "loss": 0.101, "step": 83100 }, { "epoch": 31.31, "learning_rate": 1.3737297704177646e-05, "loss": 0.1016, "step": 83200 }, { "epoch": 31.35, "learning_rate": 1.3729770417764396e-05, "loss": 0.0993, "step": 83300 }, { "epoch": 31.39, "learning_rate": 1.3722243131351149e-05, "loss": 0.0993, "step": 83400 }, { "epoch": 31.43, "learning_rate": 1.37147158449379e-05, "loss": 0.0998, "step": 83500 }, { "epoch": 31.46, "learning_rate": 1.3707188558524654e-05, "loss": 0.1008, "step": 83600 }, { "epoch": 31.5, "learning_rate": 1.3699661272111405e-05, "loss": 0.0989, "step": 83700 }, { "epoch": 31.54, "learning_rate": 1.3692133985698157e-05, "loss": 0.1027, "step": 83800 }, { "epoch": 31.58, "learning_rate": 1.3684606699284908e-05, "loss": 0.1001, "step": 83900 }, { "epoch": 31.61, "learning_rate": 1.367707941287166e-05, "loss": 0.1006, "step": 84000 }, { "epoch": 31.65, "learning_rate": 1.3669552126458411e-05, "loss": 0.0991, "step": 84100 }, { "epoch": 31.69, "learning_rate": 1.3662024840045166e-05, "loss": 0.1005, "step": 84200 }, { "epoch": 31.73, "learning_rate": 1.3654497553631916e-05, "loss": 0.099, "step": 84300 }, { "epoch": 31.77, "learning_rate": 1.3646970267218669e-05, "loss": 0.1002, "step": 84400 }, { "epoch": 31.8, "learning_rate": 1.3639442980805421e-05, "loss": 0.1001, "step": 84500 }, { "epoch": 31.84, "learning_rate": 1.3631915694392172e-05, "loss": 0.0988, "step": 84600 }, { "epoch": 31.88, "learning_rate": 1.3624388407978926e-05, "loss": 0.0998, "step": 84700 }, { "epoch": 31.92, "learning_rate": 1.3616861121565677e-05, "loss": 0.0996, "step": 84800 }, { "epoch": 31.95, "learning_rate": 1.360933383515243e-05, "loss": 0.1022, "step": 84900 }, { "epoch": 31.99, "learning_rate": 1.360180654873918e-05, "loss": 0.0998, "step": 85000 }, { "epoch": 32.0, "eval_loss": 0.10160314291715622, "eval_runtime": 45.6356, "eval_samples_per_second": 164.345, "eval_steps_per_second": 10.277, "step": 85024 }, { "epoch": 32.03, "learning_rate": 1.3594279262325933e-05, "loss": 0.1026, "step": 85100 }, { "epoch": 32.07, "learning_rate": 1.3586751975912684e-05, "loss": 0.0999, "step": 85200 }, { "epoch": 32.1, "learning_rate": 1.3579224689499436e-05, "loss": 0.101, "step": 85300 }, { "epoch": 32.14, "learning_rate": 1.3571697403086187e-05, "loss": 0.1007, "step": 85400 }, { "epoch": 32.18, "learning_rate": 1.3564170116672942e-05, "loss": 0.1001, "step": 85500 }, { "epoch": 32.22, "learning_rate": 1.3556642830259692e-05, "loss": 0.1014, "step": 85600 }, { "epoch": 32.25, "learning_rate": 1.3549115543846445e-05, "loss": 0.0993, "step": 85700 }, { "epoch": 32.29, "learning_rate": 1.3541588257433196e-05, "loss": 0.1004, "step": 85800 }, { "epoch": 32.33, "learning_rate": 1.3534060971019948e-05, "loss": 0.1015, "step": 85900 }, { "epoch": 32.37, "learning_rate": 1.3526533684606699e-05, "loss": 0.0991, "step": 86000 }, { "epoch": 32.4, "learning_rate": 1.3519006398193453e-05, "loss": 0.0984, "step": 86100 }, { "epoch": 32.44, "learning_rate": 1.3511479111780204e-05, "loss": 0.1011, "step": 86200 }, { "epoch": 32.48, "learning_rate": 1.3503951825366957e-05, "loss": 0.1, "step": 86300 }, { "epoch": 32.52, "learning_rate": 1.3496424538953707e-05, "loss": 0.1009, "step": 86400 }, { "epoch": 32.56, "learning_rate": 1.348889725254046e-05, "loss": 0.0996, "step": 86500 }, { "epoch": 32.59, "learning_rate": 1.348136996612721e-05, "loss": 0.1003, "step": 86600 }, { "epoch": 32.63, "learning_rate": 1.3473842679713965e-05, "loss": 0.0997, "step": 86700 }, { "epoch": 32.67, "learning_rate": 1.3466315393300716e-05, "loss": 0.1016, "step": 86800 }, { "epoch": 32.71, "learning_rate": 1.3458788106887468e-05, "loss": 0.1003, "step": 86900 }, { "epoch": 32.74, "learning_rate": 1.345126082047422e-05, "loss": 0.0984, "step": 87000 }, { "epoch": 32.78, "learning_rate": 1.3443733534060972e-05, "loss": 0.1008, "step": 87100 }, { "epoch": 32.82, "learning_rate": 1.3436206247647726e-05, "loss": 0.0999, "step": 87200 }, { "epoch": 32.86, "learning_rate": 1.3428678961234477e-05, "loss": 0.0996, "step": 87300 }, { "epoch": 32.89, "learning_rate": 1.3421151674821229e-05, "loss": 0.101, "step": 87400 }, { "epoch": 32.93, "learning_rate": 1.341362438840798e-05, "loss": 0.099, "step": 87500 }, { "epoch": 32.97, "learning_rate": 1.3406097101994732e-05, "loss": 0.1019, "step": 87600 }, { "epoch": 33.0, "eval_loss": 0.10076244920492172, "eval_runtime": 45.2424, "eval_samples_per_second": 165.774, "eval_steps_per_second": 10.366, "step": 87681 }, { "epoch": 33.01, "learning_rate": 1.3398569815581483e-05, "loss": 0.1001, "step": 87700 }, { "epoch": 33.04, "learning_rate": 1.3391042529168237e-05, "loss": 0.1017, "step": 87800 }, { "epoch": 33.08, "learning_rate": 1.3383515242754988e-05, "loss": 0.1, "step": 87900 }, { "epoch": 33.12, "learning_rate": 1.337598795634174e-05, "loss": 0.0983, "step": 88000 }, { "epoch": 33.16, "learning_rate": 1.3368460669928492e-05, "loss": 0.1, "step": 88100 }, { "epoch": 33.2, "learning_rate": 1.3360933383515244e-05, "loss": 0.1008, "step": 88200 }, { "epoch": 33.23, "learning_rate": 1.3353406097101995e-05, "loss": 0.0998, "step": 88300 }, { "epoch": 33.27, "learning_rate": 1.3345878810688747e-05, "loss": 0.1, "step": 88400 }, { "epoch": 33.31, "learning_rate": 1.3338351524275498e-05, "loss": 0.0997, "step": 88500 }, { "epoch": 33.35, "learning_rate": 1.3330824237862252e-05, "loss": 0.0995, "step": 88600 }, { "epoch": 33.38, "learning_rate": 1.3323296951449003e-05, "loss": 0.1015, "step": 88700 }, { "epoch": 33.42, "learning_rate": 1.3315769665035756e-05, "loss": 0.0993, "step": 88800 }, { "epoch": 33.46, "learning_rate": 1.3308242378622507e-05, "loss": 0.0986, "step": 88900 }, { "epoch": 33.5, "learning_rate": 1.330071509220926e-05, "loss": 0.1003, "step": 89000 }, { "epoch": 33.53, "learning_rate": 1.329318780579601e-05, "loss": 0.0997, "step": 89100 }, { "epoch": 33.57, "learning_rate": 1.3285660519382764e-05, "loss": 0.0993, "step": 89200 }, { "epoch": 33.61, "learning_rate": 1.3278133232969515e-05, "loss": 0.1017, "step": 89300 }, { "epoch": 33.65, "learning_rate": 1.3270605946556268e-05, "loss": 0.1003, "step": 89400 }, { "epoch": 33.68, "learning_rate": 1.326307866014302e-05, "loss": 0.1012, "step": 89500 }, { "epoch": 33.72, "learning_rate": 1.3255551373729771e-05, "loss": 0.1006, "step": 89600 }, { "epoch": 33.76, "learning_rate": 1.3248024087316525e-05, "loss": 0.0976, "step": 89700 }, { "epoch": 33.8, "learning_rate": 1.3240496800903276e-05, "loss": 0.1002, "step": 89800 }, { "epoch": 33.84, "learning_rate": 1.3232969514490028e-05, "loss": 0.0984, "step": 89900 }, { "epoch": 33.87, "learning_rate": 1.322544222807678e-05, "loss": 0.0987, "step": 90000 }, { "epoch": 33.91, "learning_rate": 1.3217914941663532e-05, "loss": 0.1008, "step": 90100 }, { "epoch": 33.95, "learning_rate": 1.3210387655250283e-05, "loss": 0.1031, "step": 90200 }, { "epoch": 33.99, "learning_rate": 1.3202860368837037e-05, "loss": 0.0999, "step": 90300 }, { "epoch": 34.0, "eval_loss": 0.10001099109649658, "eval_runtime": 45.4099, "eval_samples_per_second": 165.162, "eval_steps_per_second": 10.328, "step": 90338 }, { "epoch": 34.02, "learning_rate": 1.3195333082423788e-05, "loss": 0.0995, "step": 90400 }, { "epoch": 34.06, "learning_rate": 1.318780579601054e-05, "loss": 0.1017, "step": 90500 }, { "epoch": 34.1, "learning_rate": 1.3180278509597291e-05, "loss": 0.101, "step": 90600 }, { "epoch": 34.14, "learning_rate": 1.3172751223184043e-05, "loss": 0.1002, "step": 90700 }, { "epoch": 34.17, "learning_rate": 1.3165223936770794e-05, "loss": 0.0988, "step": 90800 }, { "epoch": 34.21, "learning_rate": 1.3157696650357548e-05, "loss": 0.101, "step": 90900 }, { "epoch": 34.25, "learning_rate": 1.31501693639443e-05, "loss": 0.0999, "step": 91000 }, { "epoch": 34.29, "learning_rate": 1.3142642077531052e-05, "loss": 0.0994, "step": 91100 }, { "epoch": 34.32, "learning_rate": 1.3135114791117803e-05, "loss": 0.1002, "step": 91200 }, { "epoch": 34.36, "learning_rate": 1.3127587504704555e-05, "loss": 0.0987, "step": 91300 }, { "epoch": 34.4, "learning_rate": 1.3120060218291306e-05, "loss": 0.1001, "step": 91400 }, { "epoch": 34.44, "learning_rate": 1.3112532931878058e-05, "loss": 0.099, "step": 91500 }, { "epoch": 34.47, "learning_rate": 1.310500564546481e-05, "loss": 0.0987, "step": 91600 }, { "epoch": 34.51, "learning_rate": 1.3097478359051563e-05, "loss": 0.0997, "step": 91700 }, { "epoch": 34.55, "learning_rate": 1.3089951072638314e-05, "loss": 0.1008, "step": 91800 }, { "epoch": 34.59, "learning_rate": 1.3082423786225067e-05, "loss": 0.0991, "step": 91900 }, { "epoch": 34.63, "learning_rate": 1.307489649981182e-05, "loss": 0.0995, "step": 92000 }, { "epoch": 34.66, "learning_rate": 1.306736921339857e-05, "loss": 0.1001, "step": 92100 }, { "epoch": 34.7, "learning_rate": 1.3059841926985324e-05, "loss": 0.0991, "step": 92200 }, { "epoch": 34.74, "learning_rate": 1.3052314640572075e-05, "loss": 0.0977, "step": 92300 }, { "epoch": 34.78, "learning_rate": 1.3044787354158828e-05, "loss": 0.0993, "step": 92400 }, { "epoch": 34.81, "learning_rate": 1.3037260067745578e-05, "loss": 0.1001, "step": 92500 }, { "epoch": 34.85, "learning_rate": 1.3029732781332331e-05, "loss": 0.1, "step": 92600 }, { "epoch": 34.89, "learning_rate": 1.3022205494919082e-05, "loss": 0.0998, "step": 92700 }, { "epoch": 34.93, "learning_rate": 1.3014678208505836e-05, "loss": 0.0996, "step": 92800 }, { "epoch": 34.96, "learning_rate": 1.3007150922092587e-05, "loss": 0.0998, "step": 92900 }, { "epoch": 35.0, "eval_loss": 0.09930834919214249, "eval_runtime": 45.6646, "eval_samples_per_second": 164.241, "eval_steps_per_second": 10.271, "step": 92995 }, { "epoch": 35.0, "learning_rate": 1.299962363567934e-05, "loss": 0.1003, "step": 93000 }, { "epoch": 35.04, "learning_rate": 1.299209634926609e-05, "loss": 0.0996, "step": 93100 }, { "epoch": 35.08, "learning_rate": 1.2984569062852843e-05, "loss": 0.0986, "step": 93200 }, { "epoch": 35.11, "learning_rate": 1.2977041776439594e-05, "loss": 0.0999, "step": 93300 }, { "epoch": 35.15, "learning_rate": 1.2969514490026348e-05, "loss": 0.1006, "step": 93400 }, { "epoch": 35.19, "learning_rate": 1.2961987203613099e-05, "loss": 0.0999, "step": 93500 }, { "epoch": 35.23, "learning_rate": 1.2954459917199851e-05, "loss": 0.0984, "step": 93600 }, { "epoch": 35.27, "learning_rate": 1.2946932630786602e-05, "loss": 0.0981, "step": 93700 }, { "epoch": 35.3, "learning_rate": 1.2939405344373354e-05, "loss": 0.1004, "step": 93800 }, { "epoch": 35.34, "learning_rate": 1.2931878057960105e-05, "loss": 0.0994, "step": 93900 }, { "epoch": 35.38, "learning_rate": 1.292435077154686e-05, "loss": 0.0984, "step": 94000 }, { "epoch": 35.42, "learning_rate": 1.291682348513361e-05, "loss": 0.1002, "step": 94100 }, { "epoch": 35.45, "learning_rate": 1.2909296198720363e-05, "loss": 0.0997, "step": 94200 }, { "epoch": 35.49, "learning_rate": 1.2901768912307114e-05, "loss": 0.0977, "step": 94300 }, { "epoch": 35.53, "learning_rate": 1.2894241625893866e-05, "loss": 0.0991, "step": 94400 }, { "epoch": 35.57, "learning_rate": 1.2886714339480619e-05, "loss": 0.0981, "step": 94500 }, { "epoch": 35.6, "learning_rate": 1.287918705306737e-05, "loss": 0.0998, "step": 94600 }, { "epoch": 35.64, "learning_rate": 1.2871659766654124e-05, "loss": 0.0999, "step": 94700 }, { "epoch": 35.68, "learning_rate": 1.2864132480240874e-05, "loss": 0.0989, "step": 94800 }, { "epoch": 35.72, "learning_rate": 1.2856605193827627e-05, "loss": 0.1, "step": 94900 }, { "epoch": 35.75, "learning_rate": 1.2849077907414378e-05, "loss": 0.1003, "step": 95000 }, { "epoch": 35.79, "learning_rate": 1.284155062100113e-05, "loss": 0.0997, "step": 95100 }, { "epoch": 35.83, "learning_rate": 1.2834023334587881e-05, "loss": 0.1002, "step": 95200 }, { "epoch": 35.87, "learning_rate": 1.2826496048174635e-05, "loss": 0.0986, "step": 95300 }, { "epoch": 35.91, "learning_rate": 1.2818968761761386e-05, "loss": 0.0999, "step": 95400 }, { "epoch": 35.94, "learning_rate": 1.2811441475348139e-05, "loss": 0.1005, "step": 95500 }, { "epoch": 35.98, "learning_rate": 1.280391418893489e-05, "loss": 0.0994, "step": 95600 }, { "epoch": 36.0, "eval_loss": 0.09918170422315598, "eval_runtime": 45.6422, "eval_samples_per_second": 164.321, "eval_steps_per_second": 10.276, "step": 95652 }, { "epoch": 36.02, "learning_rate": 1.2796386902521642e-05, "loss": 0.0979, "step": 95700 }, { "epoch": 36.06, "learning_rate": 1.2788859616108393e-05, "loss": 0.0981, "step": 95800 }, { "epoch": 36.09, "learning_rate": 1.2781332329695147e-05, "loss": 0.0992, "step": 95900 }, { "epoch": 36.13, "learning_rate": 1.2773805043281898e-05, "loss": 0.0991, "step": 96000 }, { "epoch": 36.17, "learning_rate": 1.276627775686865e-05, "loss": 0.1003, "step": 96100 }, { "epoch": 36.21, "learning_rate": 1.2758750470455401e-05, "loss": 0.0991, "step": 96200 }, { "epoch": 36.24, "learning_rate": 1.2751223184042154e-05, "loss": 0.0985, "step": 96300 }, { "epoch": 36.28, "learning_rate": 1.2743695897628904e-05, "loss": 0.0993, "step": 96400 }, { "epoch": 36.32, "learning_rate": 1.2736168611215659e-05, "loss": 0.0998, "step": 96500 }, { "epoch": 36.36, "learning_rate": 1.272864132480241e-05, "loss": 0.1014, "step": 96600 }, { "epoch": 36.39, "learning_rate": 1.2721114038389162e-05, "loss": 0.1005, "step": 96700 }, { "epoch": 36.43, "learning_rate": 1.2713586751975913e-05, "loss": 0.0992, "step": 96800 }, { "epoch": 36.47, "learning_rate": 1.2706059465562665e-05, "loss": 0.0981, "step": 96900 }, { "epoch": 36.51, "learning_rate": 1.2698532179149418e-05, "loss": 0.0988, "step": 97000 }, { "epoch": 36.54, "learning_rate": 1.2691004892736169e-05, "loss": 0.0978, "step": 97100 }, { "epoch": 36.58, "learning_rate": 1.2683477606322923e-05, "loss": 0.0989, "step": 97200 }, { "epoch": 36.62, "learning_rate": 1.2675950319909674e-05, "loss": 0.098, "step": 97300 }, { "epoch": 36.66, "learning_rate": 1.2668423033496426e-05, "loss": 0.0979, "step": 97400 }, { "epoch": 36.7, "learning_rate": 1.2660895747083177e-05, "loss": 0.0989, "step": 97500 }, { "epoch": 36.73, "learning_rate": 1.265336846066993e-05, "loss": 0.0993, "step": 97600 }, { "epoch": 36.77, "learning_rate": 1.264584117425668e-05, "loss": 0.0988, "step": 97700 }, { "epoch": 36.81, "learning_rate": 1.2638313887843435e-05, "loss": 0.0979, "step": 97800 }, { "epoch": 36.85, "learning_rate": 1.2630786601430185e-05, "loss": 0.0982, "step": 97900 }, { "epoch": 36.88, "learning_rate": 1.2623259315016938e-05, "loss": 0.0992, "step": 98000 }, { "epoch": 36.92, "learning_rate": 1.2615732028603689e-05, "loss": 0.1, "step": 98100 }, { "epoch": 36.96, "learning_rate": 1.2608204742190441e-05, "loss": 0.0977, "step": 98200 }, { "epoch": 37.0, "learning_rate": 1.2600677455777192e-05, "loss": 0.0966, "step": 98300 }, { "epoch": 37.0, "eval_loss": 0.09910181164741516, "eval_runtime": 45.3338, "eval_samples_per_second": 165.439, "eval_steps_per_second": 10.345, "step": 98309 }, { "epoch": 37.03, "learning_rate": 1.2593150169363946e-05, "loss": 0.0961, "step": 98400 }, { "epoch": 37.07, "learning_rate": 1.2585622882950697e-05, "loss": 0.0995, "step": 98500 }, { "epoch": 37.11, "learning_rate": 1.257809559653745e-05, "loss": 0.0996, "step": 98600 }, { "epoch": 37.15, "learning_rate": 1.25705683101242e-05, "loss": 0.0982, "step": 98700 }, { "epoch": 37.18, "learning_rate": 1.2563041023710953e-05, "loss": 0.0985, "step": 98800 }, { "epoch": 37.22, "learning_rate": 1.2555513737297704e-05, "loss": 0.0968, "step": 98900 }, { "epoch": 37.26, "learning_rate": 1.2547986450884458e-05, "loss": 0.0982, "step": 99000 }, { "epoch": 37.3, "learning_rate": 1.2540459164471209e-05, "loss": 0.0994, "step": 99100 }, { "epoch": 37.34, "learning_rate": 1.2532931878057961e-05, "loss": 0.1004, "step": 99200 }, { "epoch": 37.37, "learning_rate": 1.2525404591644712e-05, "loss": 0.0988, "step": 99300 }, { "epoch": 37.41, "learning_rate": 1.2517877305231465e-05, "loss": 0.099, "step": 99400 }, { "epoch": 37.45, "learning_rate": 1.2510350018818219e-05, "loss": 0.0991, "step": 99500 }, { "epoch": 37.49, "learning_rate": 1.250282273240497e-05, "loss": 0.0978, "step": 99600 }, { "epoch": 37.52, "learning_rate": 1.2495295445991722e-05, "loss": 0.0981, "step": 99700 }, { "epoch": 37.56, "learning_rate": 1.2487768159578473e-05, "loss": 0.0989, "step": 99800 }, { "epoch": 37.6, "learning_rate": 1.2480240873165226e-05, "loss": 0.0982, "step": 99900 }, { "epoch": 37.64, "learning_rate": 1.2472713586751976e-05, "loss": 0.1001, "step": 100000 }, { "epoch": 37.67, "learning_rate": 1.2465186300338729e-05, "loss": 0.0972, "step": 100100 }, { "epoch": 37.71, "learning_rate": 1.245765901392548e-05, "loss": 0.0993, "step": 100200 }, { "epoch": 37.75, "learning_rate": 1.2450131727512234e-05, "loss": 0.0997, "step": 100300 }, { "epoch": 37.79, "learning_rate": 1.2442604441098985e-05, "loss": 0.0978, "step": 100400 }, { "epoch": 37.82, "learning_rate": 1.2435077154685737e-05, "loss": 0.0992, "step": 100500 }, { "epoch": 37.86, "learning_rate": 1.2427549868272488e-05, "loss": 0.0992, "step": 100600 }, { "epoch": 37.9, "learning_rate": 1.242002258185924e-05, "loss": 0.0984, "step": 100700 }, { "epoch": 37.94, "learning_rate": 1.2412495295445991e-05, "loss": 0.0983, "step": 100800 }, { "epoch": 37.98, "learning_rate": 1.2404968009032746e-05, "loss": 0.0997, "step": 100900 }, { "epoch": 38.0, "eval_loss": 0.09699103981256485, "eval_runtime": 45.3352, "eval_samples_per_second": 165.435, "eval_steps_per_second": 10.345, "step": 100966 }, { "epoch": 38.01, "learning_rate": 1.2397440722619496e-05, "loss": 0.0983, "step": 101000 }, { "epoch": 38.05, "learning_rate": 1.2389913436206249e-05, "loss": 0.0984, "step": 101100 }, { "epoch": 38.09, "learning_rate": 1.2382386149793e-05, "loss": 0.0971, "step": 101200 }, { "epoch": 38.13, "learning_rate": 1.2374858863379752e-05, "loss": 0.0979, "step": 101300 }, { "epoch": 38.16, "learning_rate": 1.2367331576966503e-05, "loss": 0.0992, "step": 101400 }, { "epoch": 38.2, "learning_rate": 1.2359804290553257e-05, "loss": 0.0989, "step": 101500 }, { "epoch": 38.24, "learning_rate": 1.2352277004140008e-05, "loss": 0.0988, "step": 101600 }, { "epoch": 38.28, "learning_rate": 1.234474971772676e-05, "loss": 0.098, "step": 101700 }, { "epoch": 38.31, "learning_rate": 1.2337222431313511e-05, "loss": 0.0961, "step": 101800 }, { "epoch": 38.35, "learning_rate": 1.2329695144900264e-05, "loss": 0.0978, "step": 101900 }, { "epoch": 38.39, "learning_rate": 1.2322167858487018e-05, "loss": 0.1003, "step": 102000 }, { "epoch": 38.43, "learning_rate": 1.2314640572073769e-05, "loss": 0.0989, "step": 102100 }, { "epoch": 38.46, "learning_rate": 1.2307113285660521e-05, "loss": 0.0984, "step": 102200 }, { "epoch": 38.5, "learning_rate": 1.2299585999247272e-05, "loss": 0.0985, "step": 102300 }, { "epoch": 38.54, "learning_rate": 1.2292058712834025e-05, "loss": 0.1008, "step": 102400 }, { "epoch": 38.58, "learning_rate": 1.2284531426420776e-05, "loss": 0.0969, "step": 102500 }, { "epoch": 38.61, "learning_rate": 1.227700414000753e-05, "loss": 0.0981, "step": 102600 }, { "epoch": 38.65, "learning_rate": 1.226947685359428e-05, "loss": 0.0979, "step": 102700 }, { "epoch": 38.69, "learning_rate": 1.2261949567181033e-05, "loss": 0.0994, "step": 102800 }, { "epoch": 38.73, "learning_rate": 1.2254422280767784e-05, "loss": 0.0992, "step": 102900 }, { "epoch": 38.77, "learning_rate": 1.2246894994354536e-05, "loss": 0.0981, "step": 103000 }, { "epoch": 38.8, "learning_rate": 1.2239367707941287e-05, "loss": 0.0991, "step": 103100 }, { "epoch": 38.84, "learning_rate": 1.223184042152804e-05, "loss": 0.0977, "step": 103200 }, { "epoch": 38.88, "learning_rate": 1.222431313511479e-05, "loss": 0.0979, "step": 103300 }, { "epoch": 38.92, "learning_rate": 1.2216785848701545e-05, "loss": 0.0976, "step": 103400 }, { "epoch": 38.95, "learning_rate": 1.2209258562288296e-05, "loss": 0.0996, "step": 103500 }, { "epoch": 38.99, "learning_rate": 1.2201731275875048e-05, "loss": 0.0991, "step": 103600 }, { "epoch": 39.0, "eval_loss": 0.09791671484708786, "eval_runtime": 45.4451, "eval_samples_per_second": 165.034, "eval_steps_per_second": 10.32, "step": 103623 }, { "epoch": 39.03, "learning_rate": 1.2194203989461799e-05, "loss": 0.098, "step": 103700 }, { "epoch": 39.07, "learning_rate": 1.2186676703048552e-05, "loss": 0.0965, "step": 103800 }, { "epoch": 39.1, "learning_rate": 1.2179149416635302e-05, "loss": 0.0979, "step": 103900 }, { "epoch": 39.14, "learning_rate": 1.2171622130222057e-05, "loss": 0.0978, "step": 104000 }, { "epoch": 39.18, "learning_rate": 1.2164094843808807e-05, "loss": 0.0996, "step": 104100 }, { "epoch": 39.22, "learning_rate": 1.215656755739556e-05, "loss": 0.0995, "step": 104200 }, { "epoch": 39.25, "learning_rate": 1.2149040270982312e-05, "loss": 0.0988, "step": 104300 }, { "epoch": 39.29, "learning_rate": 1.2141512984569063e-05, "loss": 0.0975, "step": 104400 }, { "epoch": 39.33, "learning_rate": 1.2133985698155817e-05, "loss": 0.098, "step": 104500 }, { "epoch": 39.37, "learning_rate": 1.2126458411742568e-05, "loss": 0.098, "step": 104600 }, { "epoch": 39.41, "learning_rate": 1.211893112532932e-05, "loss": 0.098, "step": 104700 }, { "epoch": 39.44, "learning_rate": 1.2111403838916072e-05, "loss": 0.0995, "step": 104800 }, { "epoch": 39.48, "learning_rate": 1.2103876552502824e-05, "loss": 0.0977, "step": 104900 }, { "epoch": 39.52, "learning_rate": 1.2096349266089575e-05, "loss": 0.0988, "step": 105000 }, { "epoch": 39.56, "learning_rate": 1.2088821979676329e-05, "loss": 0.0986, "step": 105100 }, { "epoch": 39.59, "learning_rate": 1.208129469326308e-05, "loss": 0.0987, "step": 105200 }, { "epoch": 39.63, "learning_rate": 1.2073767406849832e-05, "loss": 0.0979, "step": 105300 }, { "epoch": 39.67, "learning_rate": 1.2066240120436583e-05, "loss": 0.0963, "step": 105400 }, { "epoch": 39.71, "learning_rate": 1.2058712834023336e-05, "loss": 0.0978, "step": 105500 }, { "epoch": 39.74, "learning_rate": 1.2051185547610087e-05, "loss": 0.0989, "step": 105600 }, { "epoch": 39.78, "learning_rate": 1.204365826119684e-05, "loss": 0.0971, "step": 105700 }, { "epoch": 39.82, "learning_rate": 1.2036130974783592e-05, "loss": 0.0985, "step": 105800 }, { "epoch": 39.86, "learning_rate": 1.2028603688370344e-05, "loss": 0.0964, "step": 105900 }, { "epoch": 39.89, "learning_rate": 1.2021076401957095e-05, "loss": 0.0968, "step": 106000 }, { "epoch": 39.93, "learning_rate": 1.2013549115543847e-05, "loss": 0.0965, "step": 106100 }, { "epoch": 39.97, "learning_rate": 1.2006021829130598e-05, "loss": 0.099, "step": 106200 }, { "epoch": 40.0, "eval_loss": 0.09832270443439484, "eval_runtime": 45.1549, "eval_samples_per_second": 166.095, "eval_steps_per_second": 10.386, "step": 106280 }, { "epoch": 40.01, "learning_rate": 1.199849454271735e-05, "loss": 0.098, "step": 106300 }, { "epoch": 40.05, "learning_rate": 1.1990967256304102e-05, "loss": 0.098, "step": 106400 }, { "epoch": 40.08, "learning_rate": 1.1983439969890856e-05, "loss": 0.0977, "step": 106500 }, { "epoch": 40.12, "learning_rate": 1.1975912683477607e-05, "loss": 0.0994, "step": 106600 }, { "epoch": 40.16, "learning_rate": 1.196838539706436e-05, "loss": 0.0976, "step": 106700 }, { "epoch": 40.2, "learning_rate": 1.1960858110651112e-05, "loss": 0.098, "step": 106800 }, { "epoch": 40.23, "learning_rate": 1.1953330824237862e-05, "loss": 0.0978, "step": 106900 }, { "epoch": 40.27, "learning_rate": 1.1945803537824617e-05, "loss": 0.0981, "step": 107000 }, { "epoch": 40.31, "learning_rate": 1.1938276251411368e-05, "loss": 0.0971, "step": 107100 }, { "epoch": 40.35, "learning_rate": 1.193074896499812e-05, "loss": 0.0985, "step": 107200 }, { "epoch": 40.38, "learning_rate": 1.1923221678584871e-05, "loss": 0.0996, "step": 107300 }, { "epoch": 40.42, "learning_rate": 1.1915694392171623e-05, "loss": 0.0981, "step": 107400 }, { "epoch": 40.46, "learning_rate": 1.1908167105758374e-05, "loss": 0.0996, "step": 107500 }, { "epoch": 40.5, "learning_rate": 1.1900639819345128e-05, "loss": 0.0989, "step": 107600 }, { "epoch": 40.53, "learning_rate": 1.189311253293188e-05, "loss": 0.0992, "step": 107700 }, { "epoch": 40.57, "learning_rate": 1.1885585246518632e-05, "loss": 0.0974, "step": 107800 }, { "epoch": 40.61, "learning_rate": 1.1878057960105383e-05, "loss": 0.0984, "step": 107900 }, { "epoch": 40.65, "learning_rate": 1.1870530673692135e-05, "loss": 0.0984, "step": 108000 }, { "epoch": 40.68, "learning_rate": 1.1863003387278886e-05, "loss": 0.099, "step": 108100 }, { "epoch": 40.72, "learning_rate": 1.185547610086564e-05, "loss": 0.0984, "step": 108200 }, { "epoch": 40.76, "learning_rate": 1.1847948814452391e-05, "loss": 0.0966, "step": 108300 }, { "epoch": 40.8, "learning_rate": 1.1840421528039143e-05, "loss": 0.1, "step": 108400 }, { "epoch": 40.84, "learning_rate": 1.1832894241625894e-05, "loss": 0.0979, "step": 108500 }, { "epoch": 40.87, "learning_rate": 1.1825366955212647e-05, "loss": 0.0981, "step": 108600 }, { "epoch": 40.91, "learning_rate": 1.1817839668799398e-05, "loss": 0.0973, "step": 108700 }, { "epoch": 40.95, "learning_rate": 1.1810312382386152e-05, "loss": 0.0978, "step": 108800 }, { "epoch": 40.99, "learning_rate": 1.1802785095972903e-05, "loss": 0.0974, "step": 108900 }, { "epoch": 41.0, "eval_loss": 0.09795571118593216, "eval_runtime": 45.3802, "eval_samples_per_second": 165.27, "eval_steps_per_second": 10.335, "step": 108937 }, { "epoch": 41.02, "learning_rate": 1.1795257809559655e-05, "loss": 0.0961, "step": 109000 }, { "epoch": 41.06, "learning_rate": 1.1787730523146406e-05, "loss": 0.0989, "step": 109100 }, { "epoch": 41.1, "learning_rate": 1.1780203236733158e-05, "loss": 0.0988, "step": 109200 }, { "epoch": 41.14, "learning_rate": 1.1772675950319911e-05, "loss": 0.0979, "step": 109300 }, { "epoch": 41.17, "learning_rate": 1.1765148663906662e-05, "loss": 0.0971, "step": 109400 }, { "epoch": 41.21, "learning_rate": 1.1757621377493416e-05, "loss": 0.0965, "step": 109500 }, { "epoch": 41.25, "learning_rate": 1.1750094091080167e-05, "loss": 0.0982, "step": 109600 }, { "epoch": 41.29, "learning_rate": 1.174256680466692e-05, "loss": 0.0974, "step": 109700 }, { "epoch": 41.32, "learning_rate": 1.173503951825367e-05, "loss": 0.097, "step": 109800 }, { "epoch": 41.36, "learning_rate": 1.1727512231840423e-05, "loss": 0.0974, "step": 109900 }, { "epoch": 41.4, "learning_rate": 1.1719984945427173e-05, "loss": 0.0969, "step": 110000 }, { "epoch": 41.44, "learning_rate": 1.1712457659013928e-05, "loss": 0.0983, "step": 110100 }, { "epoch": 41.48, "learning_rate": 1.1704930372600679e-05, "loss": 0.0978, "step": 110200 }, { "epoch": 41.51, "learning_rate": 1.1697403086187431e-05, "loss": 0.0962, "step": 110300 }, { "epoch": 41.55, "learning_rate": 1.1689875799774182e-05, "loss": 0.0985, "step": 110400 }, { "epoch": 41.59, "learning_rate": 1.1682348513360934e-05, "loss": 0.0992, "step": 110500 }, { "epoch": 41.63, "learning_rate": 1.1674821226947685e-05, "loss": 0.0972, "step": 110600 }, { "epoch": 41.66, "learning_rate": 1.166729394053444e-05, "loss": 0.098, "step": 110700 }, { "epoch": 41.7, "learning_rate": 1.165976665412119e-05, "loss": 0.0991, "step": 110800 }, { "epoch": 41.74, "learning_rate": 1.1652239367707943e-05, "loss": 0.0963, "step": 110900 }, { "epoch": 41.78, "learning_rate": 1.1644712081294694e-05, "loss": 0.0983, "step": 111000 }, { "epoch": 41.81, "learning_rate": 1.1637184794881446e-05, "loss": 0.0988, "step": 111100 }, { "epoch": 41.85, "learning_rate": 1.1629657508468197e-05, "loss": 0.0977, "step": 111200 }, { "epoch": 41.89, "learning_rate": 1.1622130222054951e-05, "loss": 0.0984, "step": 111300 }, { "epoch": 41.93, "learning_rate": 1.1614602935641702e-05, "loss": 0.0974, "step": 111400 }, { "epoch": 41.96, "learning_rate": 1.1607075649228454e-05, "loss": 0.0974, "step": 111500 }, { "epoch": 42.0, "eval_loss": 0.0971272811293602, "eval_runtime": 45.4607, "eval_samples_per_second": 164.978, "eval_steps_per_second": 10.317, "step": 111594 }, { "epoch": 42.0, "learning_rate": 1.1599548362815205e-05, "loss": 0.0983, "step": 111600 }, { "epoch": 42.04, "learning_rate": 1.1592021076401958e-05, "loss": 0.0968, "step": 111700 }, { "epoch": 42.08, "learning_rate": 1.1584493789988712e-05, "loss": 0.0984, "step": 111800 }, { "epoch": 42.12, "learning_rate": 1.1576966503575463e-05, "loss": 0.0991, "step": 111900 }, { "epoch": 42.15, "learning_rate": 1.1569439217162215e-05, "loss": 0.0965, "step": 112000 }, { "epoch": 42.19, "learning_rate": 1.1561911930748966e-05, "loss": 0.098, "step": 112100 }, { "epoch": 42.23, "learning_rate": 1.1554384644335719e-05, "loss": 0.0979, "step": 112200 }, { "epoch": 42.27, "learning_rate": 1.154685735792247e-05, "loss": 0.0971, "step": 112300 }, { "epoch": 42.3, "learning_rate": 1.1539330071509222e-05, "loss": 0.0974, "step": 112400 }, { "epoch": 42.34, "learning_rate": 1.1531802785095973e-05, "loss": 0.0963, "step": 112500 }, { "epoch": 42.38, "learning_rate": 1.1524275498682727e-05, "loss": 0.097, "step": 112600 }, { "epoch": 42.42, "learning_rate": 1.1516748212269478e-05, "loss": 0.1001, "step": 112700 }, { "epoch": 42.45, "learning_rate": 1.150922092585623e-05, "loss": 0.0972, "step": 112800 }, { "epoch": 42.49, "learning_rate": 1.1501693639442981e-05, "loss": 0.0976, "step": 112900 }, { "epoch": 42.53, "learning_rate": 1.1494166353029734e-05, "loss": 0.0963, "step": 113000 }, { "epoch": 42.57, "learning_rate": 1.1486639066616484e-05, "loss": 0.0965, "step": 113100 }, { "epoch": 42.6, "learning_rate": 1.1479111780203239e-05, "loss": 0.0978, "step": 113200 }, { "epoch": 42.64, "learning_rate": 1.147158449378999e-05, "loss": 0.0996, "step": 113300 }, { "epoch": 42.68, "learning_rate": 1.1464057207376742e-05, "loss": 0.0965, "step": 113400 }, { "epoch": 42.72, "learning_rate": 1.1456529920963493e-05, "loss": 0.0964, "step": 113500 }, { "epoch": 42.75, "learning_rate": 1.1449002634550245e-05, "loss": 0.0979, "step": 113600 }, { "epoch": 42.79, "learning_rate": 1.1441475348136996e-05, "loss": 0.0982, "step": 113700 }, { "epoch": 42.83, "learning_rate": 1.143394806172375e-05, "loss": 0.0975, "step": 113800 }, { "epoch": 42.87, "learning_rate": 1.1426420775310501e-05, "loss": 0.0956, "step": 113900 }, { "epoch": 42.91, "learning_rate": 1.1418893488897254e-05, "loss": 0.0964, "step": 114000 }, { "epoch": 42.94, "learning_rate": 1.1411366202484005e-05, "loss": 0.0984, "step": 114100 }, { "epoch": 42.98, "learning_rate": 1.1403838916070757e-05, "loss": 0.0972, "step": 114200 }, { "epoch": 43.0, "eval_loss": 0.09703872352838516, "eval_runtime": 45.2306, "eval_samples_per_second": 165.817, "eval_steps_per_second": 10.369, "step": 114251 }, { "epoch": 43.02, "learning_rate": 1.1396311629657511e-05, "loss": 0.0971, "step": 114300 }, { "epoch": 43.06, "learning_rate": 1.1388784343244262e-05, "loss": 0.0988, "step": 114400 }, { "epoch": 43.09, "learning_rate": 1.1381257056831015e-05, "loss": 0.0966, "step": 114500 }, { "epoch": 43.13, "learning_rate": 1.1373729770417765e-05, "loss": 0.0964, "step": 114600 }, { "epoch": 43.17, "learning_rate": 1.1366202484004518e-05, "loss": 0.0984, "step": 114700 }, { "epoch": 43.21, "learning_rate": 1.1358675197591269e-05, "loss": 0.0978, "step": 114800 }, { "epoch": 43.24, "learning_rate": 1.1351147911178021e-05, "loss": 0.0975, "step": 114900 }, { "epoch": 43.28, "learning_rate": 1.1343620624764772e-05, "loss": 0.0976, "step": 115000 }, { "epoch": 43.32, "learning_rate": 1.1336093338351526e-05, "loss": 0.0965, "step": 115100 }, { "epoch": 43.36, "learning_rate": 1.1328566051938277e-05, "loss": 0.0961, "step": 115200 }, { "epoch": 43.39, "learning_rate": 1.132103876552503e-05, "loss": 0.0964, "step": 115300 }, { "epoch": 43.43, "learning_rate": 1.131351147911178e-05, "loss": 0.0969, "step": 115400 }, { "epoch": 43.47, "learning_rate": 1.1305984192698533e-05, "loss": 0.097, "step": 115500 }, { "epoch": 43.51, "learning_rate": 1.1298456906285284e-05, "loss": 0.0977, "step": 115600 }, { "epoch": 43.55, "learning_rate": 1.1290929619872038e-05, "loss": 0.0981, "step": 115700 }, { "epoch": 43.58, "learning_rate": 1.1283402333458789e-05, "loss": 0.0973, "step": 115800 }, { "epoch": 43.62, "learning_rate": 1.1275875047045541e-05, "loss": 0.0968, "step": 115900 }, { "epoch": 43.66, "learning_rate": 1.1268347760632292e-05, "loss": 0.0962, "step": 116000 }, { "epoch": 43.7, "learning_rate": 1.1260820474219045e-05, "loss": 0.098, "step": 116100 }, { "epoch": 43.73, "learning_rate": 1.1253293187805795e-05, "loss": 0.0974, "step": 116200 }, { "epoch": 43.77, "learning_rate": 1.124576590139255e-05, "loss": 0.0957, "step": 116300 }, { "epoch": 43.81, "learning_rate": 1.12382386149793e-05, "loss": 0.0974, "step": 116400 }, { "epoch": 43.85, "learning_rate": 1.1230711328566053e-05, "loss": 0.096, "step": 116500 }, { "epoch": 43.88, "learning_rate": 1.1223184042152804e-05, "loss": 0.0979, "step": 116600 }, { "epoch": 43.92, "learning_rate": 1.1215656755739556e-05, "loss": 0.0958, "step": 116700 }, { "epoch": 43.96, "learning_rate": 1.120812946932631e-05, "loss": 0.097, "step": 116800 }, { "epoch": 44.0, "learning_rate": 1.1200602182913061e-05, "loss": 0.0991, "step": 116900 }, { "epoch": 44.0, "eval_loss": 0.09703505784273148, "eval_runtime": 45.5627, "eval_samples_per_second": 164.608, "eval_steps_per_second": 10.294, "step": 116908 }, { "epoch": 44.03, "learning_rate": 1.1193074896499814e-05, "loss": 0.0984, "step": 117000 }, { "epoch": 44.07, "learning_rate": 1.1185547610086565e-05, "loss": 0.0965, "step": 117100 }, { "epoch": 44.11, "learning_rate": 1.1178020323673317e-05, "loss": 0.0957, "step": 117200 }, { "epoch": 44.15, "learning_rate": 1.1170493037260068e-05, "loss": 0.0971, "step": 117300 }, { "epoch": 44.19, "learning_rate": 1.1162965750846822e-05, "loss": 0.096, "step": 117400 }, { "epoch": 44.22, "learning_rate": 1.1155438464433573e-05, "loss": 0.0959, "step": 117500 }, { "epoch": 44.26, "learning_rate": 1.1147911178020326e-05, "loss": 0.0965, "step": 117600 }, { "epoch": 44.3, "learning_rate": 1.1140383891607076e-05, "loss": 0.0987, "step": 117700 }, { "epoch": 44.34, "learning_rate": 1.1132856605193829e-05, "loss": 0.0972, "step": 117800 }, { "epoch": 44.37, "learning_rate": 1.112532931878058e-05, "loss": 0.0962, "step": 117900 }, { "epoch": 44.41, "learning_rate": 1.1117802032367332e-05, "loss": 0.0956, "step": 118000 }, { "epoch": 44.45, "learning_rate": 1.1110274745954083e-05, "loss": 0.0974, "step": 118100 }, { "epoch": 44.49, "learning_rate": 1.1102747459540837e-05, "loss": 0.0973, "step": 118200 }, { "epoch": 44.52, "learning_rate": 1.1095220173127588e-05, "loss": 0.0952, "step": 118300 }, { "epoch": 44.56, "learning_rate": 1.108769288671434e-05, "loss": 0.0968, "step": 118400 }, { "epoch": 44.6, "learning_rate": 1.1080165600301091e-05, "loss": 0.0971, "step": 118500 }, { "epoch": 44.64, "learning_rate": 1.1072638313887844e-05, "loss": 0.1, "step": 118600 }, { "epoch": 44.67, "learning_rate": 1.1065111027474595e-05, "loss": 0.0966, "step": 118700 }, { "epoch": 44.71, "learning_rate": 1.1057583741061349e-05, "loss": 0.0973, "step": 118800 }, { "epoch": 44.75, "learning_rate": 1.10500564546481e-05, "loss": 0.0957, "step": 118900 }, { "epoch": 44.79, "learning_rate": 1.1042529168234852e-05, "loss": 0.0964, "step": 119000 }, { "epoch": 44.82, "learning_rate": 1.1035001881821603e-05, "loss": 0.0954, "step": 119100 }, { "epoch": 44.86, "learning_rate": 1.1027474595408356e-05, "loss": 0.0971, "step": 119200 }, { "epoch": 44.9, "learning_rate": 1.101994730899511e-05, "loss": 0.0973, "step": 119300 }, { "epoch": 44.94, "learning_rate": 1.101242002258186e-05, "loss": 0.0975, "step": 119400 }, { "epoch": 44.98, "learning_rate": 1.1004892736168613e-05, "loss": 0.0979, "step": 119500 }, { "epoch": 45.0, "eval_loss": 0.09719178825616837, "eval_runtime": 45.3083, "eval_samples_per_second": 165.532, "eval_steps_per_second": 10.351, "step": 119565 }, { "epoch": 45.01, "learning_rate": 1.0997365449755364e-05, "loss": 0.0972, "step": 119600 }, { "epoch": 45.05, "learning_rate": 1.0989838163342116e-05, "loss": 0.0977, "step": 119700 }, { "epoch": 45.09, "learning_rate": 1.0982310876928867e-05, "loss": 0.0973, "step": 119800 }, { "epoch": 45.13, "learning_rate": 1.0974783590515621e-05, "loss": 0.0954, "step": 119900 }, { "epoch": 45.16, "learning_rate": 1.0967256304102372e-05, "loss": 0.0969, "step": 120000 }, { "epoch": 45.2, "learning_rate": 1.0959729017689125e-05, "loss": 0.0958, "step": 120100 }, { "epoch": 45.24, "learning_rate": 1.0952201731275876e-05, "loss": 0.0964, "step": 120200 }, { "epoch": 45.28, "learning_rate": 1.0944674444862628e-05, "loss": 0.0961, "step": 120300 }, { "epoch": 45.31, "learning_rate": 1.0937147158449379e-05, "loss": 0.0968, "step": 120400 }, { "epoch": 45.35, "learning_rate": 1.0929619872036133e-05, "loss": 0.0973, "step": 120500 }, { "epoch": 45.39, "learning_rate": 1.0922092585622884e-05, "loss": 0.0983, "step": 120600 }, { "epoch": 45.43, "learning_rate": 1.0914565299209637e-05, "loss": 0.0979, "step": 120700 }, { "epoch": 45.46, "learning_rate": 1.0907038012796387e-05, "loss": 0.0959, "step": 120800 }, { "epoch": 45.5, "learning_rate": 1.089951072638314e-05, "loss": 0.0967, "step": 120900 }, { "epoch": 45.54, "learning_rate": 1.089198343996989e-05, "loss": 0.0978, "step": 121000 }, { "epoch": 45.58, "learning_rate": 1.0884456153556643e-05, "loss": 0.0966, "step": 121100 }, { "epoch": 45.62, "learning_rate": 1.0876928867143394e-05, "loss": 0.0967, "step": 121200 }, { "epoch": 45.65, "learning_rate": 1.0869401580730148e-05, "loss": 0.0981, "step": 121300 }, { "epoch": 45.69, "learning_rate": 1.0861874294316899e-05, "loss": 0.0955, "step": 121400 }, { "epoch": 45.73, "learning_rate": 1.0854347007903652e-05, "loss": 0.0981, "step": 121500 }, { "epoch": 45.77, "learning_rate": 1.0846819721490402e-05, "loss": 0.0963, "step": 121600 }, { "epoch": 45.8, "learning_rate": 1.0839292435077155e-05, "loss": 0.096, "step": 121700 }, { "epoch": 45.84, "learning_rate": 1.0831765148663909e-05, "loss": 0.0969, "step": 121800 }, { "epoch": 45.88, "learning_rate": 1.082423786225066e-05, "loss": 0.0959, "step": 121900 }, { "epoch": 45.92, "learning_rate": 1.0816710575837412e-05, "loss": 0.0969, "step": 122000 }, { "epoch": 45.95, "learning_rate": 1.0809183289424163e-05, "loss": 0.0968, "step": 122100 }, { "epoch": 45.99, "learning_rate": 1.0801656003010916e-05, "loss": 0.097, "step": 122200 }, { "epoch": 46.0, "eval_loss": 0.09704454988241196, "eval_runtime": 45.4128, "eval_samples_per_second": 165.152, "eval_steps_per_second": 10.327, "step": 122222 }, { "epoch": 46.03, "learning_rate": 1.0794128716597667e-05, "loss": 0.0977, "step": 122300 }, { "epoch": 46.07, "learning_rate": 1.078660143018442e-05, "loss": 0.0966, "step": 122400 }, { "epoch": 46.1, "learning_rate": 1.0779074143771172e-05, "loss": 0.0972, "step": 122500 }, { "epoch": 46.14, "learning_rate": 1.0771546857357924e-05, "loss": 0.0965, "step": 122600 }, { "epoch": 46.18, "learning_rate": 1.0764019570944675e-05, "loss": 0.0959, "step": 122700 }, { "epoch": 46.22, "learning_rate": 1.0756492284531427e-05, "loss": 0.0955, "step": 122800 }, { "epoch": 46.26, "learning_rate": 1.0748964998118178e-05, "loss": 0.0963, "step": 122900 }, { "epoch": 46.29, "learning_rate": 1.0741437711704932e-05, "loss": 0.0965, "step": 123000 }, { "epoch": 46.33, "learning_rate": 1.0733910425291683e-05, "loss": 0.0966, "step": 123100 }, { "epoch": 46.37, "learning_rate": 1.0726383138878436e-05, "loss": 0.0959, "step": 123200 }, { "epoch": 46.41, "learning_rate": 1.0718855852465187e-05, "loss": 0.0975, "step": 123300 }, { "epoch": 46.44, "learning_rate": 1.0711328566051939e-05, "loss": 0.0957, "step": 123400 }, { "epoch": 46.48, "learning_rate": 1.070380127963869e-05, "loss": 0.0966, "step": 123500 }, { "epoch": 46.52, "learning_rate": 1.0696273993225444e-05, "loss": 0.0973, "step": 123600 }, { "epoch": 46.56, "learning_rate": 1.0688746706812195e-05, "loss": 0.0979, "step": 123700 }, { "epoch": 46.59, "learning_rate": 1.0681219420398947e-05, "loss": 0.0983, "step": 123800 }, { "epoch": 46.63, "learning_rate": 1.0673692133985698e-05, "loss": 0.0969, "step": 123900 }, { "epoch": 46.67, "learning_rate": 1.066616484757245e-05, "loss": 0.097, "step": 124000 }, { "epoch": 46.71, "learning_rate": 1.0658637561159202e-05, "loss": 0.0969, "step": 124100 }, { "epoch": 46.74, "learning_rate": 1.0651110274745954e-05, "loss": 0.0967, "step": 124200 }, { "epoch": 46.78, "learning_rate": 1.0643582988332708e-05, "loss": 0.0955, "step": 124300 }, { "epoch": 46.82, "learning_rate": 1.063605570191946e-05, "loss": 0.0967, "step": 124400 }, { "epoch": 46.86, "learning_rate": 1.0628528415506212e-05, "loss": 0.0972, "step": 124500 }, { "epoch": 46.89, "learning_rate": 1.0621001129092963e-05, "loss": 0.0975, "step": 124600 }, { "epoch": 46.93, "learning_rate": 1.0613473842679715e-05, "loss": 0.0956, "step": 124700 }, { "epoch": 46.97, "learning_rate": 1.0605946556266466e-05, "loss": 0.0936, "step": 124800 }, { "epoch": 47.0, "eval_loss": 0.096713587641716, "eval_runtime": 45.4283, "eval_samples_per_second": 165.095, "eval_steps_per_second": 10.324, "step": 124879 }, { "epoch": 47.01, "learning_rate": 1.059841926985322e-05, "loss": 0.0966, "step": 124900 }, { "epoch": 47.05, "learning_rate": 1.0590891983439971e-05, "loss": 0.0958, "step": 125000 }, { "epoch": 47.08, "learning_rate": 1.0583364697026723e-05, "loss": 0.0961, "step": 125100 }, { "epoch": 47.12, "learning_rate": 1.0575837410613474e-05, "loss": 0.0955, "step": 125200 }, { "epoch": 47.16, "learning_rate": 1.0568310124200227e-05, "loss": 0.0958, "step": 125300 }, { "epoch": 47.2, "learning_rate": 1.0560782837786978e-05, "loss": 0.0961, "step": 125400 }, { "epoch": 47.23, "learning_rate": 1.0553255551373732e-05, "loss": 0.0959, "step": 125500 }, { "epoch": 47.27, "learning_rate": 1.0545728264960483e-05, "loss": 0.0954, "step": 125600 }, { "epoch": 47.31, "learning_rate": 1.0538200978547235e-05, "loss": 0.0992, "step": 125700 }, { "epoch": 47.35, "learning_rate": 1.0530673692133986e-05, "loss": 0.0967, "step": 125800 }, { "epoch": 47.38, "learning_rate": 1.0523146405720738e-05, "loss": 0.0957, "step": 125900 }, { "epoch": 47.42, "learning_rate": 1.051561911930749e-05, "loss": 0.0963, "step": 126000 }, { "epoch": 47.46, "learning_rate": 1.0508091832894243e-05, "loss": 0.0959, "step": 126100 }, { "epoch": 47.5, "learning_rate": 1.0500564546480994e-05, "loss": 0.0956, "step": 126200 }, { "epoch": 47.53, "learning_rate": 1.0493037260067747e-05, "loss": 0.0976, "step": 126300 }, { "epoch": 47.57, "learning_rate": 1.0485509973654498e-05, "loss": 0.0981, "step": 126400 }, { "epoch": 47.61, "learning_rate": 1.047798268724125e-05, "loss": 0.0982, "step": 126500 }, { "epoch": 47.65, "learning_rate": 1.0470455400828001e-05, "loss": 0.0959, "step": 126600 }, { "epoch": 47.69, "learning_rate": 1.0462928114414755e-05, "loss": 0.098, "step": 126700 }, { "epoch": 47.72, "learning_rate": 1.0455400828001508e-05, "loss": 0.0958, "step": 126800 }, { "epoch": 47.76, "learning_rate": 1.0447873541588258e-05, "loss": 0.0977, "step": 126900 }, { "epoch": 47.8, "learning_rate": 1.0440346255175011e-05, "loss": 0.0944, "step": 127000 }, { "epoch": 47.84, "learning_rate": 1.0432818968761762e-05, "loss": 0.0953, "step": 127100 }, { "epoch": 47.87, "learning_rate": 1.0425291682348514e-05, "loss": 0.0949, "step": 127200 }, { "epoch": 47.91, "learning_rate": 1.0417764395935265e-05, "loss": 0.0957, "step": 127300 }, { "epoch": 47.95, "learning_rate": 1.041023710952202e-05, "loss": 0.0976, "step": 127400 }, { "epoch": 47.99, "learning_rate": 1.040270982310877e-05, "loss": 0.0948, "step": 127500 }, { "epoch": 48.0, "eval_loss": 0.0966743603348732, "eval_runtime": 45.5618, "eval_samples_per_second": 164.611, "eval_steps_per_second": 10.294, "step": 127536 }, { "epoch": 48.02, "learning_rate": 1.0395182536695523e-05, "loss": 0.096, "step": 127600 }, { "epoch": 48.06, "learning_rate": 1.0387655250282273e-05, "loss": 0.0958, "step": 127700 }, { "epoch": 48.1, "learning_rate": 1.0380127963869026e-05, "loss": 0.0963, "step": 127800 }, { "epoch": 48.14, "learning_rate": 1.0372600677455777e-05, "loss": 0.095, "step": 127900 }, { "epoch": 48.17, "learning_rate": 1.0365073391042531e-05, "loss": 0.0973, "step": 128000 }, { "epoch": 48.21, "learning_rate": 1.0357546104629282e-05, "loss": 0.0958, "step": 128100 }, { "epoch": 48.25, "learning_rate": 1.0350018818216034e-05, "loss": 0.094, "step": 128200 }, { "epoch": 48.29, "learning_rate": 1.0342491531802785e-05, "loss": 0.0965, "step": 128300 }, { "epoch": 48.33, "learning_rate": 1.0334964245389538e-05, "loss": 0.0962, "step": 128400 }, { "epoch": 48.36, "learning_rate": 1.0327436958976289e-05, "loss": 0.0973, "step": 128500 }, { "epoch": 48.4, "learning_rate": 1.0319909672563043e-05, "loss": 0.0966, "step": 128600 }, { "epoch": 48.44, "learning_rate": 1.0312382386149794e-05, "loss": 0.0967, "step": 128700 }, { "epoch": 48.48, "learning_rate": 1.0304855099736546e-05, "loss": 0.0983, "step": 128800 }, { "epoch": 48.51, "learning_rate": 1.0297327813323297e-05, "loss": 0.0953, "step": 128900 }, { "epoch": 48.55, "learning_rate": 1.028980052691005e-05, "loss": 0.0951, "step": 129000 }, { "epoch": 48.59, "learning_rate": 1.02822732404968e-05, "loss": 0.0988, "step": 129100 }, { "epoch": 48.63, "learning_rate": 1.0274745954083554e-05, "loss": 0.095, "step": 129200 }, { "epoch": 48.66, "learning_rate": 1.0267218667670307e-05, "loss": 0.0956, "step": 129300 }, { "epoch": 48.7, "learning_rate": 1.0259691381257058e-05, "loss": 0.0962, "step": 129400 }, { "epoch": 48.74, "learning_rate": 1.025216409484381e-05, "loss": 0.0964, "step": 129500 }, { "epoch": 48.78, "learning_rate": 1.0244636808430561e-05, "loss": 0.0952, "step": 129600 }, { "epoch": 48.81, "learning_rate": 1.0237109522017315e-05, "loss": 0.0959, "step": 129700 }, { "epoch": 48.85, "learning_rate": 1.0229582235604066e-05, "loss": 0.0965, "step": 129800 }, { "epoch": 48.89, "learning_rate": 1.0222054949190819e-05, "loss": 0.0949, "step": 129900 }, { "epoch": 48.93, "learning_rate": 1.021452766277757e-05, "loss": 0.0966, "step": 130000 }, { "epoch": 48.96, "learning_rate": 1.0207000376364322e-05, "loss": 0.0974, "step": 130100 }, { "epoch": 49.0, "eval_loss": 0.09543051570653915, "eval_runtime": 45.608, "eval_samples_per_second": 164.445, "eval_steps_per_second": 10.283, "step": 130193 }, { "epoch": 49.0, "learning_rate": 1.0199473089951073e-05, "loss": 0.0959, "step": 130200 }, { "epoch": 49.04, "learning_rate": 1.0191945803537825e-05, "loss": 0.0962, "step": 130300 }, { "epoch": 49.08, "learning_rate": 1.0184418517124576e-05, "loss": 0.0954, "step": 130400 }, { "epoch": 49.12, "learning_rate": 1.017689123071133e-05, "loss": 0.0959, "step": 130500 }, { "epoch": 49.15, "learning_rate": 1.0169363944298081e-05, "loss": 0.0949, "step": 130600 }, { "epoch": 49.19, "learning_rate": 1.0161836657884834e-05, "loss": 0.0967, "step": 130700 }, { "epoch": 49.23, "learning_rate": 1.0154309371471584e-05, "loss": 0.0965, "step": 130800 }, { "epoch": 49.27, "learning_rate": 1.0146782085058337e-05, "loss": 0.0963, "step": 130900 }, { "epoch": 49.3, "learning_rate": 1.0139254798645088e-05, "loss": 0.0956, "step": 131000 }, { "epoch": 49.34, "learning_rate": 1.0131727512231842e-05, "loss": 0.0959, "step": 131100 }, { "epoch": 49.38, "learning_rate": 1.0124200225818593e-05, "loss": 0.0958, "step": 131200 }, { "epoch": 49.42, "learning_rate": 1.0116672939405345e-05, "loss": 0.0974, "step": 131300 }, { "epoch": 49.45, "learning_rate": 1.0109145652992096e-05, "loss": 0.0984, "step": 131400 }, { "epoch": 49.49, "learning_rate": 1.0101618366578849e-05, "loss": 0.0946, "step": 131500 }, { "epoch": 49.53, "learning_rate": 1.00940910801656e-05, "loss": 0.0967, "step": 131600 }, { "epoch": 49.57, "learning_rate": 1.0086563793752354e-05, "loss": 0.0968, "step": 131700 }, { "epoch": 49.6, "learning_rate": 1.0079036507339106e-05, "loss": 0.0961, "step": 131800 }, { "epoch": 49.64, "learning_rate": 1.0071509220925857e-05, "loss": 0.0946, "step": 131900 }, { "epoch": 49.68, "learning_rate": 1.006398193451261e-05, "loss": 0.0959, "step": 132000 }, { "epoch": 49.72, "learning_rate": 1.005645464809936e-05, "loss": 0.0959, "step": 132100 }, { "epoch": 49.76, "learning_rate": 1.0048927361686115e-05, "loss": 0.0974, "step": 132200 }, { "epoch": 49.79, "learning_rate": 1.0041400075272865e-05, "loss": 0.095, "step": 132300 }, { "epoch": 49.83, "learning_rate": 1.0033872788859618e-05, "loss": 0.0954, "step": 132400 }, { "epoch": 49.87, "learning_rate": 1.0026345502446369e-05, "loss": 0.0958, "step": 132500 }, { "epoch": 49.91, "learning_rate": 1.0018818216033121e-05, "loss": 0.0959, "step": 132600 }, { "epoch": 49.94, "learning_rate": 1.0011290929619872e-05, "loss": 0.095, "step": 132700 }, { "epoch": 49.98, "learning_rate": 1.0003763643206625e-05, "loss": 0.0958, "step": 132800 }, { "epoch": 50.0, "eval_loss": 0.09539712220430374, "eval_runtime": 45.0771, "eval_samples_per_second": 166.382, "eval_steps_per_second": 10.404, "step": 132850 }, { "epoch": 50.02, "learning_rate": 9.996236356793377e-06, "loss": 0.0956, "step": 132900 }, { "epoch": 50.06, "learning_rate": 9.98870907038013e-06, "loss": 0.0943, "step": 133000 }, { "epoch": 50.09, "learning_rate": 9.98118178396688e-06, "loss": 0.0958, "step": 133100 }, { "epoch": 50.13, "learning_rate": 9.973654497553633e-06, "loss": 0.0952, "step": 133200 }, { "epoch": 50.17, "learning_rate": 9.966127211140385e-06, "loss": 0.0969, "step": 133300 }, { "epoch": 50.21, "learning_rate": 9.958599924727136e-06, "loss": 0.0952, "step": 133400 }, { "epoch": 50.24, "learning_rate": 9.951072638313889e-06, "loss": 0.0959, "step": 133500 }, { "epoch": 50.28, "learning_rate": 9.943545351900641e-06, "loss": 0.0949, "step": 133600 }, { "epoch": 50.32, "learning_rate": 9.936018065487392e-06, "loss": 0.0964, "step": 133700 }, { "epoch": 50.36, "learning_rate": 9.928490779074145e-06, "loss": 0.0952, "step": 133800 }, { "epoch": 50.4, "learning_rate": 9.920963492660897e-06, "loss": 0.0944, "step": 133900 }, { "epoch": 50.43, "learning_rate": 9.913436206247648e-06, "loss": 0.0973, "step": 134000 }, { "epoch": 50.47, "learning_rate": 9.9059089198344e-06, "loss": 0.0949, "step": 134100 }, { "epoch": 50.51, "learning_rate": 9.898381633421153e-06, "loss": 0.096, "step": 134200 }, { "epoch": 50.55, "learning_rate": 9.890854347007904e-06, "loss": 0.0962, "step": 134300 }, { "epoch": 50.58, "learning_rate": 9.883327060594656e-06, "loss": 0.0972, "step": 134400 }, { "epoch": 50.62, "learning_rate": 9.875799774181409e-06, "loss": 0.0931, "step": 134500 }, { "epoch": 50.66, "learning_rate": 9.86827248776816e-06, "loss": 0.0961, "step": 134600 }, { "epoch": 50.7, "learning_rate": 9.860745201354912e-06, "loss": 0.0947, "step": 134700 }, { "epoch": 50.73, "learning_rate": 9.853217914941665e-06, "loss": 0.0968, "step": 134800 }, { "epoch": 50.77, "learning_rate": 9.845690628528415e-06, "loss": 0.095, "step": 134900 }, { "epoch": 50.81, "learning_rate": 9.838163342115168e-06, "loss": 0.0966, "step": 135000 }, { "epoch": 50.85, "learning_rate": 9.83063605570192e-06, "loss": 0.0966, "step": 135100 }, { "epoch": 50.88, "learning_rate": 9.823108769288671e-06, "loss": 0.0957, "step": 135200 }, { "epoch": 50.92, "learning_rate": 9.815581482875424e-06, "loss": 0.0948, "step": 135300 }, { "epoch": 50.96, "learning_rate": 9.808054196462176e-06, "loss": 0.0942, "step": 135400 }, { "epoch": 51.0, "learning_rate": 9.800526910048929e-06, "loss": 0.0948, "step": 135500 }, { "epoch": 51.0, "eval_loss": 0.09547575563192368, "eval_runtime": 45.2525, "eval_samples_per_second": 165.737, "eval_steps_per_second": 10.364, "step": 135507 }, { "epoch": 51.04, "learning_rate": 9.792999623635681e-06, "loss": 0.0973, "step": 135600 }, { "epoch": 51.07, "learning_rate": 9.785472337222432e-06, "loss": 0.0962, "step": 135700 }, { "epoch": 51.11, "learning_rate": 9.777945050809185e-06, "loss": 0.0957, "step": 135800 }, { "epoch": 51.15, "learning_rate": 9.770417764395936e-06, "loss": 0.0953, "step": 135900 }, { "epoch": 51.19, "learning_rate": 9.762890477982688e-06, "loss": 0.0948, "step": 136000 }, { "epoch": 51.22, "learning_rate": 9.75536319156944e-06, "loss": 0.0958, "step": 136100 }, { "epoch": 51.26, "learning_rate": 9.747835905156191e-06, "loss": 0.0956, "step": 136200 }, { "epoch": 51.3, "learning_rate": 9.740308618742944e-06, "loss": 0.0957, "step": 136300 }, { "epoch": 51.34, "learning_rate": 9.732781332329696e-06, "loss": 0.0953, "step": 136400 }, { "epoch": 51.37, "learning_rate": 9.725254045916447e-06, "loss": 0.0942, "step": 136500 }, { "epoch": 51.41, "learning_rate": 9.7177267595032e-06, "loss": 0.0952, "step": 136600 }, { "epoch": 51.45, "learning_rate": 9.710199473089952e-06, "loss": 0.0949, "step": 136700 }, { "epoch": 51.49, "learning_rate": 9.702672186676703e-06, "loss": 0.0961, "step": 136800 }, { "epoch": 51.52, "learning_rate": 9.695144900263456e-06, "loss": 0.0949, "step": 136900 }, { "epoch": 51.56, "learning_rate": 9.687617613850208e-06, "loss": 0.0955, "step": 137000 }, { "epoch": 51.6, "learning_rate": 9.680090327436959e-06, "loss": 0.0954, "step": 137100 }, { "epoch": 51.64, "learning_rate": 9.672563041023711e-06, "loss": 0.0958, "step": 137200 }, { "epoch": 51.67, "learning_rate": 9.665035754610464e-06, "loss": 0.0961, "step": 137300 }, { "epoch": 51.71, "learning_rate": 9.657508468197215e-06, "loss": 0.0971, "step": 137400 }, { "epoch": 51.75, "learning_rate": 9.649981181783967e-06, "loss": 0.0952, "step": 137500 }, { "epoch": 51.79, "learning_rate": 9.64245389537072e-06, "loss": 0.0961, "step": 137600 }, { "epoch": 51.83, "learning_rate": 9.63492660895747e-06, "loss": 0.0963, "step": 137700 }, { "epoch": 51.86, "learning_rate": 9.627399322544223e-06, "loss": 0.0952, "step": 137800 }, { "epoch": 51.9, "learning_rate": 9.619872036130976e-06, "loss": 0.0944, "step": 137900 }, { "epoch": 51.94, "learning_rate": 9.612344749717728e-06, "loss": 0.0952, "step": 138000 }, { "epoch": 51.98, "learning_rate": 9.60481746330448e-06, "loss": 0.095, "step": 138100 }, { "epoch": 52.0, "eval_loss": 0.0952862873673439, "eval_runtime": 45.261, "eval_samples_per_second": 165.706, "eval_steps_per_second": 10.362, "step": 138164 }, { "epoch": 52.01, "learning_rate": 9.597290176891231e-06, "loss": 0.0958, "step": 138200 }, { "epoch": 52.05, "learning_rate": 9.589762890477984e-06, "loss": 0.0955, "step": 138300 }, { "epoch": 52.09, "learning_rate": 9.582235604064737e-06, "loss": 0.0959, "step": 138400 }, { "epoch": 52.13, "learning_rate": 9.574708317651487e-06, "loss": 0.0971, "step": 138500 }, { "epoch": 52.16, "learning_rate": 9.56718103123824e-06, "loss": 0.0952, "step": 138600 }, { "epoch": 52.2, "learning_rate": 9.559653744824992e-06, "loss": 0.0955, "step": 138700 }, { "epoch": 52.24, "learning_rate": 9.552126458411743e-06, "loss": 0.0946, "step": 138800 }, { "epoch": 52.28, "learning_rate": 9.544599171998496e-06, "loss": 0.0965, "step": 138900 }, { "epoch": 52.31, "learning_rate": 9.537071885585247e-06, "loss": 0.0941, "step": 139000 }, { "epoch": 52.35, "learning_rate": 9.529544599171999e-06, "loss": 0.096, "step": 139100 }, { "epoch": 52.39, "learning_rate": 9.522017312758752e-06, "loss": 0.0928, "step": 139200 }, { "epoch": 52.43, "learning_rate": 9.514490026345502e-06, "loss": 0.096, "step": 139300 }, { "epoch": 52.47, "learning_rate": 9.506962739932255e-06, "loss": 0.0953, "step": 139400 }, { "epoch": 52.5, "learning_rate": 9.499435453519007e-06, "loss": 0.0943, "step": 139500 }, { "epoch": 52.54, "learning_rate": 9.491908167105758e-06, "loss": 0.0948, "step": 139600 }, { "epoch": 52.58, "learning_rate": 9.48438088069251e-06, "loss": 0.0953, "step": 139700 }, { "epoch": 52.62, "learning_rate": 9.476853594279263e-06, "loss": 0.0952, "step": 139800 }, { "epoch": 52.65, "learning_rate": 9.469326307866014e-06, "loss": 0.0934, "step": 139900 }, { "epoch": 52.69, "learning_rate": 9.461799021452767e-06, "loss": 0.0965, "step": 140000 }, { "epoch": 52.73, "learning_rate": 9.454271735039519e-06, "loss": 0.0962, "step": 140100 }, { "epoch": 52.77, "learning_rate": 9.44674444862627e-06, "loss": 0.0954, "step": 140200 }, { "epoch": 52.8, "learning_rate": 9.439217162213024e-06, "loss": 0.095, "step": 140300 }, { "epoch": 52.84, "learning_rate": 9.431689875799775e-06, "loss": 0.0941, "step": 140400 }, { "epoch": 52.88, "learning_rate": 9.424162589386527e-06, "loss": 0.0953, "step": 140500 }, { "epoch": 52.92, "learning_rate": 9.41663530297328e-06, "loss": 0.0963, "step": 140600 }, { "epoch": 52.95, "learning_rate": 9.40910801656003e-06, "loss": 0.0958, "step": 140700 }, { "epoch": 52.99, "learning_rate": 9.401580730146783e-06, "loss": 0.0939, "step": 140800 }, { "epoch": 53.0, "eval_loss": 0.09453196078538895, "eval_runtime": 45.2882, "eval_samples_per_second": 165.606, "eval_steps_per_second": 10.356, "step": 140821 }, { "epoch": 53.03, "learning_rate": 9.394053443733536e-06, "loss": 0.095, "step": 140900 }, { "epoch": 53.07, "learning_rate": 9.386526157320287e-06, "loss": 0.0955, "step": 141000 }, { "epoch": 53.11, "learning_rate": 9.378998870907039e-06, "loss": 0.0944, "step": 141100 }, { "epoch": 53.14, "learning_rate": 9.371471584493792e-06, "loss": 0.0953, "step": 141200 }, { "epoch": 53.18, "learning_rate": 9.363944298080542e-06, "loss": 0.0945, "step": 141300 }, { "epoch": 53.22, "learning_rate": 9.356417011667295e-06, "loss": 0.0959, "step": 141400 }, { "epoch": 53.26, "learning_rate": 9.348889725254047e-06, "loss": 0.0938, "step": 141500 }, { "epoch": 53.29, "learning_rate": 9.341362438840798e-06, "loss": 0.0956, "step": 141600 }, { "epoch": 53.33, "learning_rate": 9.33383515242755e-06, "loss": 0.0962, "step": 141700 }, { "epoch": 53.37, "learning_rate": 9.326307866014303e-06, "loss": 0.0959, "step": 141800 }, { "epoch": 53.41, "learning_rate": 9.318780579601054e-06, "loss": 0.0953, "step": 141900 }, { "epoch": 53.44, "learning_rate": 9.311253293187807e-06, "loss": 0.0952, "step": 142000 }, { "epoch": 53.48, "learning_rate": 9.303726006774557e-06, "loss": 0.0945, "step": 142100 }, { "epoch": 53.52, "learning_rate": 9.29619872036131e-06, "loss": 0.0958, "step": 142200 }, { "epoch": 53.56, "learning_rate": 9.288671433948063e-06, "loss": 0.0949, "step": 142300 }, { "epoch": 53.59, "learning_rate": 9.281144147534813e-06, "loss": 0.0943, "step": 142400 }, { "epoch": 53.63, "learning_rate": 9.273616861121566e-06, "loss": 0.0957, "step": 142500 }, { "epoch": 53.67, "learning_rate": 9.266089574708318e-06, "loss": 0.0949, "step": 142600 }, { "epoch": 53.71, "learning_rate": 9.25856228829507e-06, "loss": 0.0939, "step": 142700 }, { "epoch": 53.74, "learning_rate": 9.251035001881823e-06, "loss": 0.0942, "step": 142800 }, { "epoch": 53.78, "learning_rate": 9.243507715468574e-06, "loss": 0.0955, "step": 142900 }, { "epoch": 53.82, "learning_rate": 9.235980429055327e-06, "loss": 0.0935, "step": 143000 }, { "epoch": 53.86, "learning_rate": 9.22845314264208e-06, "loss": 0.0952, "step": 143100 }, { "epoch": 53.9, "learning_rate": 9.22092585622883e-06, "loss": 0.0946, "step": 143200 }, { "epoch": 53.93, "learning_rate": 9.213398569815583e-06, "loss": 0.0971, "step": 143300 }, { "epoch": 53.97, "learning_rate": 9.205871283402335e-06, "loss": 0.0961, "step": 143400 }, { "epoch": 54.0, "eval_loss": 0.09483154118061066, "eval_runtime": 45.4413, "eval_samples_per_second": 165.048, "eval_steps_per_second": 10.321, "step": 143478 }, { "epoch": 54.01, "learning_rate": 9.198343996989086e-06, "loss": 0.0943, "step": 143500 }, { "epoch": 54.05, "learning_rate": 9.190816710575838e-06, "loss": 0.097, "step": 143600 }, { "epoch": 54.08, "learning_rate": 9.183289424162591e-06, "loss": 0.0956, "step": 143700 }, { "epoch": 54.12, "learning_rate": 9.175762137749342e-06, "loss": 0.0945, "step": 143800 }, { "epoch": 54.16, "learning_rate": 9.168234851336094e-06, "loss": 0.0968, "step": 143900 }, { "epoch": 54.2, "learning_rate": 9.160707564922847e-06, "loss": 0.0955, "step": 144000 }, { "epoch": 54.23, "learning_rate": 9.153180278509598e-06, "loss": 0.0952, "step": 144100 }, { "epoch": 54.27, "learning_rate": 9.14565299209635e-06, "loss": 0.0941, "step": 144200 }, { "epoch": 54.31, "learning_rate": 9.138125705683103e-06, "loss": 0.0935, "step": 144300 }, { "epoch": 54.35, "learning_rate": 9.130598419269853e-06, "loss": 0.0949, "step": 144400 }, { "epoch": 54.38, "learning_rate": 9.123071132856606e-06, "loss": 0.0957, "step": 144500 }, { "epoch": 54.42, "learning_rate": 9.115543846443358e-06, "loss": 0.0958, "step": 144600 }, { "epoch": 54.46, "learning_rate": 9.10801656003011e-06, "loss": 0.0932, "step": 144700 }, { "epoch": 54.5, "learning_rate": 9.100489273616862e-06, "loss": 0.0945, "step": 144800 }, { "epoch": 54.54, "learning_rate": 9.092961987203613e-06, "loss": 0.0956, "step": 144900 }, { "epoch": 54.57, "learning_rate": 9.085434700790365e-06, "loss": 0.0949, "step": 145000 }, { "epoch": 54.61, "learning_rate": 9.077907414377118e-06, "loss": 0.0942, "step": 145100 }, { "epoch": 54.65, "learning_rate": 9.070380127963868e-06, "loss": 0.0949, "step": 145200 }, { "epoch": 54.69, "learning_rate": 9.062852841550623e-06, "loss": 0.0947, "step": 145300 }, { "epoch": 54.72, "learning_rate": 9.055325555137373e-06, "loss": 0.0952, "step": 145400 }, { "epoch": 54.76, "learning_rate": 9.047798268724126e-06, "loss": 0.096, "step": 145500 }, { "epoch": 54.8, "learning_rate": 9.040270982310879e-06, "loss": 0.0953, "step": 145600 }, { "epoch": 54.84, "learning_rate": 9.03274369589763e-06, "loss": 0.094, "step": 145700 }, { "epoch": 54.87, "learning_rate": 9.025216409484382e-06, "loss": 0.0936, "step": 145800 }, { "epoch": 54.91, "learning_rate": 9.017689123071134e-06, "loss": 0.0951, "step": 145900 }, { "epoch": 54.95, "learning_rate": 9.010161836657885e-06, "loss": 0.0959, "step": 146000 }, { "epoch": 54.99, "learning_rate": 9.002634550244638e-06, "loss": 0.0964, "step": 146100 }, { "epoch": 55.0, "eval_loss": 0.09549073874950409, "eval_runtime": 45.2457, "eval_samples_per_second": 165.762, "eval_steps_per_second": 10.366, "step": 146135 }, { "epoch": 55.02, "learning_rate": 8.99510726383139e-06, "loss": 0.0963, "step": 146200 }, { "epoch": 55.06, "learning_rate": 8.987579977418141e-06, "loss": 0.0962, "step": 146300 }, { "epoch": 55.1, "learning_rate": 8.980052691004894e-06, "loss": 0.0954, "step": 146400 }, { "epoch": 55.14, "learning_rate": 8.972525404591646e-06, "loss": 0.0934, "step": 146500 }, { "epoch": 55.18, "learning_rate": 8.964998118178397e-06, "loss": 0.0945, "step": 146600 }, { "epoch": 55.21, "learning_rate": 8.95747083176515e-06, "loss": 0.0936, "step": 146700 }, { "epoch": 55.25, "learning_rate": 8.949943545351902e-06, "loss": 0.095, "step": 146800 }, { "epoch": 55.29, "learning_rate": 8.942416258938653e-06, "loss": 0.094, "step": 146900 }, { "epoch": 55.33, "learning_rate": 8.934888972525405e-06, "loss": 0.0944, "step": 147000 }, { "epoch": 55.36, "learning_rate": 8.927361686112158e-06, "loss": 0.0947, "step": 147100 }, { "epoch": 55.4, "learning_rate": 8.919834399698909e-06, "loss": 0.0966, "step": 147200 }, { "epoch": 55.44, "learning_rate": 8.912307113285661e-06, "loss": 0.0933, "step": 147300 }, { "epoch": 55.48, "learning_rate": 8.904779826872414e-06, "loss": 0.0939, "step": 147400 }, { "epoch": 55.51, "learning_rate": 8.897252540459164e-06, "loss": 0.0953, "step": 147500 }, { "epoch": 55.55, "learning_rate": 8.889725254045917e-06, "loss": 0.0963, "step": 147600 }, { "epoch": 55.59, "learning_rate": 8.88219796763267e-06, "loss": 0.0947, "step": 147700 }, { "epoch": 55.63, "learning_rate": 8.874670681219422e-06, "loss": 0.0933, "step": 147800 }, { "epoch": 55.66, "learning_rate": 8.867143394806173e-06, "loss": 0.0951, "step": 147900 }, { "epoch": 55.7, "learning_rate": 8.859616108392925e-06, "loss": 0.0955, "step": 148000 }, { "epoch": 55.74, "learning_rate": 8.852088821979678e-06, "loss": 0.0943, "step": 148100 }, { "epoch": 55.78, "learning_rate": 8.844561535566429e-06, "loss": 0.0942, "step": 148200 }, { "epoch": 55.81, "learning_rate": 8.837034249153181e-06, "loss": 0.0962, "step": 148300 }, { "epoch": 55.85, "learning_rate": 8.829506962739934e-06, "loss": 0.0937, "step": 148400 }, { "epoch": 55.89, "learning_rate": 8.821979676326684e-06, "loss": 0.0944, "step": 148500 }, { "epoch": 55.93, "learning_rate": 8.814452389913437e-06, "loss": 0.0937, "step": 148600 }, { "epoch": 55.97, "learning_rate": 8.80692510350019e-06, "loss": 0.0934, "step": 148700 }, { "epoch": 56.0, "eval_loss": 0.0948183611035347, "eval_runtime": 44.9888, "eval_samples_per_second": 166.708, "eval_steps_per_second": 10.425, "step": 148792 }, { "epoch": 56.0, "learning_rate": 8.79939781708694e-06, "loss": 0.0939, "step": 148800 }, { "epoch": 56.04, "learning_rate": 8.791870530673693e-06, "loss": 0.0966, "step": 148900 }, { "epoch": 56.08, "learning_rate": 8.784343244260445e-06, "loss": 0.0951, "step": 149000 }, { "epoch": 56.12, "learning_rate": 8.776815957847196e-06, "loss": 0.0955, "step": 149100 }, { "epoch": 56.15, "learning_rate": 8.769288671433949e-06, "loss": 0.0959, "step": 149200 }, { "epoch": 56.19, "learning_rate": 8.761761385020701e-06, "loss": 0.0949, "step": 149300 }, { "epoch": 56.23, "learning_rate": 8.754234098607452e-06, "loss": 0.0938, "step": 149400 }, { "epoch": 56.27, "learning_rate": 8.746706812194205e-06, "loss": 0.0941, "step": 149500 }, { "epoch": 56.3, "learning_rate": 8.739179525780957e-06, "loss": 0.0939, "step": 149600 }, { "epoch": 56.34, "learning_rate": 8.731652239367708e-06, "loss": 0.0944, "step": 149700 }, { "epoch": 56.38, "learning_rate": 8.72412495295446e-06, "loss": 0.0949, "step": 149800 }, { "epoch": 56.42, "learning_rate": 8.716597666541213e-06, "loss": 0.0952, "step": 149900 }, { "epoch": 56.45, "learning_rate": 8.709070380127964e-06, "loss": 0.0967, "step": 150000 }, { "epoch": 56.49, "learning_rate": 8.701543093714716e-06, "loss": 0.0948, "step": 150100 }, { "epoch": 56.53, "learning_rate": 8.694015807301469e-06, "loss": 0.0942, "step": 150200 }, { "epoch": 56.57, "learning_rate": 8.686488520888221e-06, "loss": 0.0948, "step": 150300 }, { "epoch": 56.61, "learning_rate": 8.678961234474974e-06, "loss": 0.0954, "step": 150400 }, { "epoch": 56.64, "learning_rate": 8.671433948061725e-06, "loss": 0.0951, "step": 150500 }, { "epoch": 56.68, "learning_rate": 8.663906661648477e-06, "loss": 0.094, "step": 150600 }, { "epoch": 56.72, "learning_rate": 8.65637937523523e-06, "loss": 0.094, "step": 150700 }, { "epoch": 56.76, "learning_rate": 8.64885208882198e-06, "loss": 0.0947, "step": 150800 }, { "epoch": 56.79, "learning_rate": 8.641324802408733e-06, "loss": 0.0954, "step": 150900 }, { "epoch": 56.83, "learning_rate": 8.633797515995484e-06, "loss": 0.0941, "step": 151000 }, { "epoch": 56.87, "learning_rate": 8.626270229582236e-06, "loss": 0.0948, "step": 151100 }, { "epoch": 56.91, "learning_rate": 8.618742943168989e-06, "loss": 0.0934, "step": 151200 }, { "epoch": 56.94, "learning_rate": 8.61121565675574e-06, "loss": 0.0927, "step": 151300 }, { "epoch": 56.98, "learning_rate": 8.603688370342492e-06, "loss": 0.0965, "step": 151400 }, { "epoch": 57.0, "eval_loss": 0.09426940232515335, "eval_runtime": 45.2417, "eval_samples_per_second": 165.776, "eval_steps_per_second": 10.367, "step": 151449 }, { "epoch": 57.02, "learning_rate": 8.596161083929245e-06, "loss": 0.0941, "step": 151500 }, { "epoch": 57.06, "learning_rate": 8.588633797515995e-06, "loss": 0.0947, "step": 151600 }, { "epoch": 57.09, "learning_rate": 8.581106511102748e-06, "loss": 0.0932, "step": 151700 }, { "epoch": 57.13, "learning_rate": 8.5735792246895e-06, "loss": 0.0947, "step": 151800 }, { "epoch": 57.17, "learning_rate": 8.566051938276251e-06, "loss": 0.0954, "step": 151900 }, { "epoch": 57.21, "learning_rate": 8.558524651863004e-06, "loss": 0.0956, "step": 152000 }, { "epoch": 57.25, "learning_rate": 8.550997365449756e-06, "loss": 0.0939, "step": 152100 }, { "epoch": 57.28, "learning_rate": 8.543470079036507e-06, "loss": 0.0943, "step": 152200 }, { "epoch": 57.32, "learning_rate": 8.53594279262326e-06, "loss": 0.0926, "step": 152300 }, { "epoch": 57.36, "learning_rate": 8.528415506210012e-06, "loss": 0.0936, "step": 152400 }, { "epoch": 57.4, "learning_rate": 8.520888219796763e-06, "loss": 0.0942, "step": 152500 }, { "epoch": 57.43, "learning_rate": 8.513360933383515e-06, "loss": 0.0955, "step": 152600 }, { "epoch": 57.47, "learning_rate": 8.505833646970268e-06, "loss": 0.0955, "step": 152700 }, { "epoch": 57.51, "learning_rate": 8.49830636055702e-06, "loss": 0.0946, "step": 152800 }, { "epoch": 57.55, "learning_rate": 8.490779074143773e-06, "loss": 0.0964, "step": 152900 }, { "epoch": 57.58, "learning_rate": 8.483251787730524e-06, "loss": 0.094, "step": 153000 }, { "epoch": 57.62, "learning_rate": 8.475724501317276e-06, "loss": 0.0949, "step": 153100 }, { "epoch": 57.66, "learning_rate": 8.468197214904029e-06, "loss": 0.0947, "step": 153200 }, { "epoch": 57.7, "learning_rate": 8.46066992849078e-06, "loss": 0.0952, "step": 153300 }, { "epoch": 57.73, "learning_rate": 8.453142642077532e-06, "loss": 0.0939, "step": 153400 }, { "epoch": 57.77, "learning_rate": 8.445615355664285e-06, "loss": 0.0947, "step": 153500 }, { "epoch": 57.81, "learning_rate": 8.438088069251036e-06, "loss": 0.094, "step": 153600 }, { "epoch": 57.85, "learning_rate": 8.430560782837788e-06, "loss": 0.0922, "step": 153700 }, { "epoch": 57.88, "learning_rate": 8.423033496424539e-06, "loss": 0.0947, "step": 153800 }, { "epoch": 57.92, "learning_rate": 8.415506210011291e-06, "loss": 0.0948, "step": 153900 }, { "epoch": 57.96, "learning_rate": 8.407978923598044e-06, "loss": 0.0921, "step": 154000 }, { "epoch": 58.0, "learning_rate": 8.400451637184795e-06, "loss": 0.0966, "step": 154100 }, { "epoch": 58.0, "eval_loss": 0.094062440097332, "eval_runtime": 45.4411, "eval_samples_per_second": 165.049, "eval_steps_per_second": 10.321, "step": 154106 }, { "epoch": 58.04, "learning_rate": 8.392924350771547e-06, "loss": 0.0938, "step": 154200 }, { "epoch": 58.07, "learning_rate": 8.3853970643583e-06, "loss": 0.0938, "step": 154300 }, { "epoch": 58.11, "learning_rate": 8.37786977794505e-06, "loss": 0.0936, "step": 154400 }, { "epoch": 58.15, "learning_rate": 8.370342491531803e-06, "loss": 0.0942, "step": 154500 }, { "epoch": 58.19, "learning_rate": 8.362815205118556e-06, "loss": 0.0961, "step": 154600 }, { "epoch": 58.22, "learning_rate": 8.355287918705306e-06, "loss": 0.0945, "step": 154700 }, { "epoch": 58.26, "learning_rate": 8.347760632292059e-06, "loss": 0.0954, "step": 154800 }, { "epoch": 58.3, "learning_rate": 8.340233345878811e-06, "loss": 0.0942, "step": 154900 }, { "epoch": 58.34, "learning_rate": 8.332706059465562e-06, "loss": 0.095, "step": 155000 }, { "epoch": 58.37, "learning_rate": 8.325178773052315e-06, "loss": 0.0945, "step": 155100 }, { "epoch": 58.41, "learning_rate": 8.317651486639067e-06, "loss": 0.0937, "step": 155200 }, { "epoch": 58.45, "learning_rate": 8.31012420022582e-06, "loss": 0.0956, "step": 155300 }, { "epoch": 58.49, "learning_rate": 8.302596913812572e-06, "loss": 0.0926, "step": 155400 }, { "epoch": 58.52, "learning_rate": 8.295069627399323e-06, "loss": 0.0936, "step": 155500 }, { "epoch": 58.56, "learning_rate": 8.287542340986076e-06, "loss": 0.095, "step": 155600 }, { "epoch": 58.6, "learning_rate": 8.280015054572828e-06, "loss": 0.0933, "step": 155700 }, { "epoch": 58.64, "learning_rate": 8.272487768159579e-06, "loss": 0.0941, "step": 155800 }, { "epoch": 58.68, "learning_rate": 8.264960481746331e-06, "loss": 0.0937, "step": 155900 }, { "epoch": 58.71, "learning_rate": 8.257433195333084e-06, "loss": 0.0948, "step": 156000 }, { "epoch": 58.75, "learning_rate": 8.249905908919835e-06, "loss": 0.0947, "step": 156100 }, { "epoch": 58.79, "learning_rate": 8.242378622506587e-06, "loss": 0.0919, "step": 156200 }, { "epoch": 58.83, "learning_rate": 8.23485133609334e-06, "loss": 0.0956, "step": 156300 }, { "epoch": 58.86, "learning_rate": 8.22732404968009e-06, "loss": 0.0946, "step": 156400 }, { "epoch": 58.9, "learning_rate": 8.219796763266843e-06, "loss": 0.0934, "step": 156500 }, { "epoch": 58.94, "learning_rate": 8.212269476853596e-06, "loss": 0.0953, "step": 156600 }, { "epoch": 58.98, "learning_rate": 8.204742190440347e-06, "loss": 0.0926, "step": 156700 }, { "epoch": 59.0, "eval_loss": 0.0938277319073677, "eval_runtime": 45.0217, "eval_samples_per_second": 166.586, "eval_steps_per_second": 10.417, "step": 156763 }, { "epoch": 59.01, "learning_rate": 8.197214904027099e-06, "loss": 0.0939, "step": 156800 }, { "epoch": 59.05, "learning_rate": 8.18968761761385e-06, "loss": 0.0931, "step": 156900 }, { "epoch": 59.09, "learning_rate": 8.182160331200602e-06, "loss": 0.0945, "step": 157000 }, { "epoch": 59.13, "learning_rate": 8.174633044787355e-06, "loss": 0.0941, "step": 157100 }, { "epoch": 59.16, "learning_rate": 8.167105758374106e-06, "loss": 0.0937, "step": 157200 }, { "epoch": 59.2, "learning_rate": 8.159578471960858e-06, "loss": 0.0945, "step": 157300 }, { "epoch": 59.24, "learning_rate": 8.15205118554761e-06, "loss": 0.0949, "step": 157400 }, { "epoch": 59.28, "learning_rate": 8.144523899134362e-06, "loss": 0.0926, "step": 157500 }, { "epoch": 59.32, "learning_rate": 8.136996612721114e-06, "loss": 0.0943, "step": 157600 }, { "epoch": 59.35, "learning_rate": 8.129469326307867e-06, "loss": 0.0929, "step": 157700 }, { "epoch": 59.39, "learning_rate": 8.121942039894619e-06, "loss": 0.0964, "step": 157800 }, { "epoch": 59.43, "learning_rate": 8.114414753481372e-06, "loss": 0.0944, "step": 157900 }, { "epoch": 59.47, "learning_rate": 8.106887467068122e-06, "loss": 0.0943, "step": 158000 }, { "epoch": 59.5, "learning_rate": 8.099360180654875e-06, "loss": 0.0962, "step": 158100 }, { "epoch": 59.54, "learning_rate": 8.091832894241627e-06, "loss": 0.0948, "step": 158200 }, { "epoch": 59.58, "learning_rate": 8.084305607828378e-06, "loss": 0.095, "step": 158300 }, { "epoch": 59.62, "learning_rate": 8.07677832141513e-06, "loss": 0.0941, "step": 158400 }, { "epoch": 59.65, "learning_rate": 8.069251035001883e-06, "loss": 0.0949, "step": 158500 }, { "epoch": 59.69, "learning_rate": 8.061723748588634e-06, "loss": 0.0945, "step": 158600 }, { "epoch": 59.73, "learning_rate": 8.054196462175387e-06, "loss": 0.0946, "step": 158700 }, { "epoch": 59.77, "learning_rate": 8.046669175762139e-06, "loss": 0.0948, "step": 158800 }, { "epoch": 59.8, "learning_rate": 8.03914188934889e-06, "loss": 0.0946, "step": 158900 }, { "epoch": 59.84, "learning_rate": 8.031614602935642e-06, "loss": 0.0945, "step": 159000 }, { "epoch": 59.88, "learning_rate": 8.024087316522395e-06, "loss": 0.0944, "step": 159100 }, { "epoch": 59.92, "learning_rate": 8.016560030109146e-06, "loss": 0.0942, "step": 159200 }, { "epoch": 59.95, "learning_rate": 8.009032743695898e-06, "loss": 0.0937, "step": 159300 }, { "epoch": 59.99, "learning_rate": 8.00150545728265e-06, "loss": 0.0928, "step": 159400 }, { "epoch": 60.0, "eval_loss": 0.09416601806879044, "eval_runtime": 45.0453, "eval_samples_per_second": 166.499, "eval_steps_per_second": 10.412, "step": 159420 }, { "epoch": 60.03, "learning_rate": 7.993978170869402e-06, "loss": 0.0923, "step": 159500 }, { "epoch": 60.07, "learning_rate": 7.986450884456154e-06, "loss": 0.0956, "step": 159600 }, { "epoch": 60.11, "learning_rate": 7.978923598042907e-06, "loss": 0.0954, "step": 159700 }, { "epoch": 60.14, "learning_rate": 7.971396311629657e-06, "loss": 0.0946, "step": 159800 }, { "epoch": 60.18, "learning_rate": 7.96386902521641e-06, "loss": 0.0952, "step": 159900 }, { "epoch": 60.22, "learning_rate": 7.95634173880316e-06, "loss": 0.0932, "step": 160000 }, { "epoch": 60.26, "learning_rate": 7.948814452389913e-06, "loss": 0.0942, "step": 160100 }, { "epoch": 60.29, "learning_rate": 7.941287165976666e-06, "loss": 0.0923, "step": 160200 }, { "epoch": 60.33, "learning_rate": 7.933759879563418e-06, "loss": 0.0943, "step": 160300 }, { "epoch": 60.37, "learning_rate": 7.926232593150171e-06, "loss": 0.0926, "step": 160400 }, { "epoch": 60.41, "learning_rate": 7.918705306736922e-06, "loss": 0.0944, "step": 160500 }, { "epoch": 60.44, "learning_rate": 7.911178020323674e-06, "loss": 0.0951, "step": 160600 }, { "epoch": 60.48, "learning_rate": 7.903650733910427e-06, "loss": 0.0925, "step": 160700 }, { "epoch": 60.52, "learning_rate": 7.896123447497178e-06, "loss": 0.0938, "step": 160800 }, { "epoch": 60.56, "learning_rate": 7.88859616108393e-06, "loss": 0.0949, "step": 160900 }, { "epoch": 60.59, "learning_rate": 7.881068874670683e-06, "loss": 0.0924, "step": 161000 }, { "epoch": 60.63, "learning_rate": 7.873541588257433e-06, "loss": 0.0945, "step": 161100 }, { "epoch": 60.67, "learning_rate": 7.866014301844186e-06, "loss": 0.0935, "step": 161200 }, { "epoch": 60.71, "learning_rate": 7.858487015430938e-06, "loss": 0.095, "step": 161300 }, { "epoch": 60.75, "learning_rate": 7.85095972901769e-06, "loss": 0.0938, "step": 161400 }, { "epoch": 60.78, "learning_rate": 7.843432442604442e-06, "loss": 0.0937, "step": 161500 }, { "epoch": 60.82, "learning_rate": 7.835905156191194e-06, "loss": 0.0942, "step": 161600 }, { "epoch": 60.86, "learning_rate": 7.828377869777945e-06, "loss": 0.0955, "step": 161700 }, { "epoch": 60.9, "learning_rate": 7.820850583364698e-06, "loss": 0.0932, "step": 161800 }, { "epoch": 60.93, "learning_rate": 7.81332329695145e-06, "loss": 0.094, "step": 161900 }, { "epoch": 60.97, "learning_rate": 7.805796010538201e-06, "loss": 0.093, "step": 162000 }, { "epoch": 61.0, "eval_loss": 0.09355577826499939, "eval_runtime": 45.0615, "eval_samples_per_second": 166.439, "eval_steps_per_second": 10.408, "step": 162077 }, { "epoch": 61.01, "learning_rate": 7.798268724124953e-06, "loss": 0.0932, "step": 162100 }, { "epoch": 61.05, "learning_rate": 7.790741437711706e-06, "loss": 0.0942, "step": 162200 }, { "epoch": 61.08, "learning_rate": 7.783214151298457e-06, "loss": 0.0933, "step": 162300 }, { "epoch": 61.12, "learning_rate": 7.77568686488521e-06, "loss": 0.0942, "step": 162400 }, { "epoch": 61.16, "learning_rate": 7.768159578471962e-06, "loss": 0.0938, "step": 162500 }, { "epoch": 61.2, "learning_rate": 7.760632292058713e-06, "loss": 0.0948, "step": 162600 }, { "epoch": 61.23, "learning_rate": 7.753105005645465e-06, "loss": 0.0946, "step": 162700 }, { "epoch": 61.27, "learning_rate": 7.745577719232218e-06, "loss": 0.0926, "step": 162800 }, { "epoch": 61.31, "learning_rate": 7.73805043281897e-06, "loss": 0.0945, "step": 162900 }, { "epoch": 61.35, "learning_rate": 7.730523146405721e-06, "loss": 0.0923, "step": 163000 }, { "epoch": 61.39, "learning_rate": 7.722995859992473e-06, "loss": 0.0935, "step": 163100 }, { "epoch": 61.42, "learning_rate": 7.715468573579226e-06, "loss": 0.0938, "step": 163200 }, { "epoch": 61.46, "learning_rate": 7.707941287165977e-06, "loss": 0.0938, "step": 163300 }, { "epoch": 61.5, "learning_rate": 7.70041400075273e-06, "loss": 0.0929, "step": 163400 }, { "epoch": 61.54, "learning_rate": 7.692886714339482e-06, "loss": 0.0937, "step": 163500 }, { "epoch": 61.57, "learning_rate": 7.685359427926233e-06, "loss": 0.0921, "step": 163600 }, { "epoch": 61.61, "learning_rate": 7.677832141512985e-06, "loss": 0.0933, "step": 163700 }, { "epoch": 61.65, "learning_rate": 7.670304855099738e-06, "loss": 0.0929, "step": 163800 }, { "epoch": 61.69, "learning_rate": 7.662777568686489e-06, "loss": 0.0931, "step": 163900 }, { "epoch": 61.72, "learning_rate": 7.655250282273241e-06, "loss": 0.0947, "step": 164000 }, { "epoch": 61.76, "learning_rate": 7.647722995859994e-06, "loss": 0.0944, "step": 164100 }, { "epoch": 61.8, "learning_rate": 7.640195709446744e-06, "loss": 0.0929, "step": 164200 }, { "epoch": 61.84, "learning_rate": 7.632668423033497e-06, "loss": 0.0945, "step": 164300 }, { "epoch": 61.87, "learning_rate": 7.6251411366202485e-06, "loss": 0.0952, "step": 164400 }, { "epoch": 61.91, "learning_rate": 7.617613850207001e-06, "loss": 0.0939, "step": 164500 }, { "epoch": 61.95, "learning_rate": 7.610086563793753e-06, "loss": 0.0949, "step": 164600 }, { "epoch": 61.99, "learning_rate": 7.602559277380504e-06, "loss": 0.0939, "step": 164700 }, { "epoch": 62.0, "eval_loss": 0.09392710030078888, "eval_runtime": 45.1193, "eval_samples_per_second": 166.226, "eval_steps_per_second": 10.395, "step": 164734 }, { "epoch": 62.02, "learning_rate": 7.595031990967257e-06, "loss": 0.0937, "step": 164800 }, { "epoch": 62.06, "learning_rate": 7.5875047045540086e-06, "loss": 0.0934, "step": 164900 }, { "epoch": 62.1, "learning_rate": 7.57997741814076e-06, "loss": 0.094, "step": 165000 }, { "epoch": 62.14, "learning_rate": 7.572450131727512e-06, "loss": 0.0947, "step": 165100 }, { "epoch": 62.18, "learning_rate": 7.564922845314265e-06, "loss": 0.0926, "step": 165200 }, { "epoch": 62.21, "learning_rate": 7.557395558901017e-06, "loss": 0.0947, "step": 165300 }, { "epoch": 62.25, "learning_rate": 7.5498682724877694e-06, "loss": 0.0937, "step": 165400 }, { "epoch": 62.29, "learning_rate": 7.542340986074521e-06, "loss": 0.0914, "step": 165500 }, { "epoch": 62.33, "learning_rate": 7.534813699661273e-06, "loss": 0.0953, "step": 165600 }, { "epoch": 62.36, "learning_rate": 7.527286413248025e-06, "loss": 0.0952, "step": 165700 }, { "epoch": 62.4, "learning_rate": 7.519759126834777e-06, "loss": 0.0935, "step": 165800 }, { "epoch": 62.44, "learning_rate": 7.512231840421529e-06, "loss": 0.0947, "step": 165900 }, { "epoch": 62.48, "learning_rate": 7.504704554008281e-06, "loss": 0.0935, "step": 166000 }, { "epoch": 62.51, "learning_rate": 7.497177267595033e-06, "loss": 0.0957, "step": 166100 }, { "epoch": 62.55, "learning_rate": 7.4896499811817845e-06, "loss": 0.0926, "step": 166200 }, { "epoch": 62.59, "learning_rate": 7.482122694768537e-06, "loss": 0.0943, "step": 166300 }, { "epoch": 62.63, "learning_rate": 7.474595408355289e-06, "loss": 0.0938, "step": 166400 }, { "epoch": 62.66, "learning_rate": 7.46706812194204e-06, "loss": 0.0943, "step": 166500 }, { "epoch": 62.7, "learning_rate": 7.459540835528792e-06, "loss": 0.0924, "step": 166600 }, { "epoch": 62.74, "learning_rate": 7.4520135491155445e-06, "loss": 0.0929, "step": 166700 }, { "epoch": 62.78, "learning_rate": 7.444486262702296e-06, "loss": 0.0936, "step": 166800 }, { "epoch": 62.82, "learning_rate": 7.436958976289048e-06, "loss": 0.0931, "step": 166900 }, { "epoch": 62.85, "learning_rate": 7.4294316898758e-06, "loss": 0.0943, "step": 167000 }, { "epoch": 62.89, "learning_rate": 7.421904403462552e-06, "loss": 0.094, "step": 167100 }, { "epoch": 62.93, "learning_rate": 7.414377117049304e-06, "loss": 0.0931, "step": 167200 }, { "epoch": 62.97, "learning_rate": 7.406849830636056e-06, "loss": 0.0936, "step": 167300 }, { "epoch": 63.0, "eval_loss": 0.09357059001922607, "eval_runtime": 45.0624, "eval_samples_per_second": 166.436, "eval_steps_per_second": 10.408, "step": 167391 }, { "epoch": 63.0, "learning_rate": 7.399322544222808e-06, "loss": 0.0936, "step": 167400 }, { "epoch": 63.04, "learning_rate": 7.3917952578095595e-06, "loss": 0.094, "step": 167500 }, { "epoch": 63.08, "learning_rate": 7.384267971396312e-06, "loss": 0.0946, "step": 167600 }, { "epoch": 63.12, "learning_rate": 7.3767406849830646e-06, "loss": 0.0941, "step": 167700 }, { "epoch": 63.15, "learning_rate": 7.369213398569817e-06, "loss": 0.0943, "step": 167800 }, { "epoch": 63.19, "learning_rate": 7.361686112156569e-06, "loss": 0.0949, "step": 167900 }, { "epoch": 63.23, "learning_rate": 7.35415882574332e-06, "loss": 0.0945, "step": 168000 }, { "epoch": 63.27, "learning_rate": 7.346631539330072e-06, "loss": 0.0924, "step": 168100 }, { "epoch": 63.3, "learning_rate": 7.3391042529168246e-06, "loss": 0.094, "step": 168200 }, { "epoch": 63.34, "learning_rate": 7.331576966503576e-06, "loss": 0.0955, "step": 168300 }, { "epoch": 63.38, "learning_rate": 7.324049680090328e-06, "loss": 0.0937, "step": 168400 }, { "epoch": 63.42, "learning_rate": 7.31652239367708e-06, "loss": 0.0924, "step": 168500 }, { "epoch": 63.46, "learning_rate": 7.308995107263832e-06, "loss": 0.0943, "step": 168600 }, { "epoch": 63.49, "learning_rate": 7.301467820850584e-06, "loss": 0.094, "step": 168700 }, { "epoch": 63.53, "learning_rate": 7.293940534437336e-06, "loss": 0.0916, "step": 168800 }, { "epoch": 63.57, "learning_rate": 7.286413248024088e-06, "loss": 0.092, "step": 168900 }, { "epoch": 63.61, "learning_rate": 7.27888596161084e-06, "loss": 0.0951, "step": 169000 }, { "epoch": 63.64, "learning_rate": 7.271358675197592e-06, "loss": 0.0928, "step": 169100 }, { "epoch": 63.68, "learning_rate": 7.263831388784344e-06, "loss": 0.0938, "step": 169200 }, { "epoch": 63.72, "learning_rate": 7.2563041023710954e-06, "loss": 0.0936, "step": 169300 }, { "epoch": 63.76, "learning_rate": 7.248776815957848e-06, "loss": 0.0928, "step": 169400 }, { "epoch": 63.79, "learning_rate": 7.2412495295446e-06, "loss": 0.0944, "step": 169500 }, { "epoch": 63.83, "learning_rate": 7.233722243131351e-06, "loss": 0.0925, "step": 169600 }, { "epoch": 63.87, "learning_rate": 7.226194956718103e-06, "loss": 0.0932, "step": 169700 }, { "epoch": 63.91, "learning_rate": 7.2186676703048555e-06, "loss": 0.0934, "step": 169800 }, { "epoch": 63.94, "learning_rate": 7.211140383891607e-06, "loss": 0.0927, "step": 169900 }, { "epoch": 63.98, "learning_rate": 7.203613097478359e-06, "loss": 0.093, "step": 170000 }, { "epoch": 64.0, "eval_loss": 0.09292689710855484, "eval_runtime": 45.1577, "eval_samples_per_second": 166.085, "eval_steps_per_second": 10.386, "step": 170048 }, { "epoch": 64.02, "learning_rate": 7.196085811065112e-06, "loss": 0.0933, "step": 170100 }, { "epoch": 64.06, "learning_rate": 7.188558524651864e-06, "loss": 0.0938, "step": 170200 }, { "epoch": 64.09, "learning_rate": 7.181031238238616e-06, "loss": 0.0913, "step": 170300 }, { "epoch": 64.13, "learning_rate": 7.173503951825368e-06, "loss": 0.0919, "step": 170400 }, { "epoch": 64.17, "learning_rate": 7.16597666541212e-06, "loss": 0.0949, "step": 170500 }, { "epoch": 64.21, "learning_rate": 7.158449378998872e-06, "loss": 0.0938, "step": 170600 }, { "epoch": 64.25, "learning_rate": 7.150922092585624e-06, "loss": 0.0948, "step": 170700 }, { "epoch": 64.28, "learning_rate": 7.1433948061723755e-06, "loss": 0.093, "step": 170800 }, { "epoch": 64.32, "learning_rate": 7.135867519759127e-06, "loss": 0.0933, "step": 170900 }, { "epoch": 64.36, "learning_rate": 7.12834023334588e-06, "loss": 0.0915, "step": 171000 }, { "epoch": 64.4, "learning_rate": 7.120812946932631e-06, "loss": 0.093, "step": 171100 }, { "epoch": 64.43, "learning_rate": 7.113285660519383e-06, "loss": 0.0933, "step": 171200 }, { "epoch": 64.47, "learning_rate": 7.1057583741061356e-06, "loss": 0.0936, "step": 171300 }, { "epoch": 64.51, "learning_rate": 7.098231087692887e-06, "loss": 0.0935, "step": 171400 }, { "epoch": 64.55, "learning_rate": 7.090703801279639e-06, "loss": 0.094, "step": 171500 }, { "epoch": 64.58, "learning_rate": 7.083176514866391e-06, "loss": 0.0941, "step": 171600 }, { "epoch": 64.62, "learning_rate": 7.075649228453143e-06, "loss": 0.0946, "step": 171700 }, { "epoch": 64.66, "learning_rate": 7.068121942039895e-06, "loss": 0.0946, "step": 171800 }, { "epoch": 64.7, "learning_rate": 7.060594655626647e-06, "loss": 0.0926, "step": 171900 }, { "epoch": 64.73, "learning_rate": 7.053067369213399e-06, "loss": 0.0946, "step": 172000 }, { "epoch": 64.77, "learning_rate": 7.045540082800151e-06, "loss": 0.0937, "step": 172100 }, { "epoch": 64.81, "learning_rate": 7.038012796386903e-06, "loss": 0.0927, "step": 172200 }, { "epoch": 64.85, "learning_rate": 7.030485509973655e-06, "loss": 0.0945, "step": 172300 }, { "epoch": 64.89, "learning_rate": 7.022958223560406e-06, "loss": 0.0918, "step": 172400 }, { "epoch": 64.92, "learning_rate": 7.015430937147158e-06, "loss": 0.0923, "step": 172500 }, { "epoch": 64.96, "learning_rate": 7.0079036507339114e-06, "loss": 0.0926, "step": 172600 }, { "epoch": 65.0, "learning_rate": 7.000376364320663e-06, "loss": 0.0929, "step": 172700 }, { "epoch": 65.0, "eval_loss": 0.0930134728550911, "eval_runtime": 44.9287, "eval_samples_per_second": 166.931, "eval_steps_per_second": 10.439, "step": 172705 }, { "epoch": 65.04, "learning_rate": 6.992849077907416e-06, "loss": 0.0929, "step": 172800 }, { "epoch": 65.07, "learning_rate": 6.985321791494167e-06, "loss": 0.0932, "step": 172900 }, { "epoch": 65.11, "learning_rate": 6.977794505080919e-06, "loss": 0.0948, "step": 173000 }, { "epoch": 65.15, "learning_rate": 6.9702672186676715e-06, "loss": 0.093, "step": 173100 }, { "epoch": 65.19, "learning_rate": 6.962739932254423e-06, "loss": 0.0947, "step": 173200 }, { "epoch": 65.22, "learning_rate": 6.955212645841175e-06, "loss": 0.0925, "step": 173300 }, { "epoch": 65.26, "learning_rate": 6.947685359427927e-06, "loss": 0.093, "step": 173400 }, { "epoch": 65.3, "learning_rate": 6.940158073014679e-06, "loss": 0.0931, "step": 173500 }, { "epoch": 65.34, "learning_rate": 6.932630786601431e-06, "loss": 0.0944, "step": 173600 }, { "epoch": 65.37, "learning_rate": 6.925103500188183e-06, "loss": 0.0923, "step": 173700 }, { "epoch": 65.41, "learning_rate": 6.917576213774935e-06, "loss": 0.0919, "step": 173800 }, { "epoch": 65.45, "learning_rate": 6.9100489273616865e-06, "loss": 0.0945, "step": 173900 }, { "epoch": 65.49, "learning_rate": 6.902521640948438e-06, "loss": 0.0924, "step": 174000 }, { "epoch": 65.53, "learning_rate": 6.894994354535191e-06, "loss": 0.0941, "step": 174100 }, { "epoch": 65.56, "learning_rate": 6.887467068121942e-06, "loss": 0.0918, "step": 174200 }, { "epoch": 65.6, "learning_rate": 6.879939781708694e-06, "loss": 0.0935, "step": 174300 }, { "epoch": 65.64, "learning_rate": 6.8724124952954465e-06, "loss": 0.0944, "step": 174400 }, { "epoch": 65.68, "learning_rate": 6.864885208882198e-06, "loss": 0.0924, "step": 174500 }, { "epoch": 65.71, "learning_rate": 6.85735792246895e-06, "loss": 0.0927, "step": 174600 }, { "epoch": 65.75, "learning_rate": 6.849830636055702e-06, "loss": 0.094, "step": 174700 }, { "epoch": 65.79, "learning_rate": 6.842303349642454e-06, "loss": 0.0935, "step": 174800 }, { "epoch": 65.83, "learning_rate": 6.834776063229206e-06, "loss": 0.0927, "step": 174900 }, { "epoch": 65.86, "learning_rate": 6.827248776815958e-06, "loss": 0.0937, "step": 175000 }, { "epoch": 65.9, "learning_rate": 6.819721490402711e-06, "loss": 0.0938, "step": 175100 }, { "epoch": 65.94, "learning_rate": 6.812194203989463e-06, "loss": 0.0931, "step": 175200 }, { "epoch": 65.98, "learning_rate": 6.804666917576215e-06, "loss": 0.0917, "step": 175300 }, { "epoch": 66.0, "eval_loss": 0.09251850843429565, "eval_runtime": 44.9106, "eval_samples_per_second": 166.998, "eval_steps_per_second": 10.443, "step": 175362 }, { "epoch": 66.01, "learning_rate": 6.797139631162967e-06, "loss": 0.0936, "step": 175400 }, { "epoch": 66.05, "learning_rate": 6.789612344749718e-06, "loss": 0.0948, "step": 175500 }, { "epoch": 66.09, "learning_rate": 6.782085058336471e-06, "loss": 0.0945, "step": 175600 }, { "epoch": 66.13, "learning_rate": 6.7745577719232224e-06, "loss": 0.0937, "step": 175700 }, { "epoch": 66.16, "learning_rate": 6.767030485509974e-06, "loss": 0.0945, "step": 175800 }, { "epoch": 66.2, "learning_rate": 6.759503199096727e-06, "loss": 0.0932, "step": 175900 }, { "epoch": 66.24, "learning_rate": 6.751975912683478e-06, "loss": 0.0936, "step": 176000 }, { "epoch": 66.28, "learning_rate": 6.74444862627023e-06, "loss": 0.0933, "step": 176100 }, { "epoch": 66.32, "learning_rate": 6.7369213398569825e-06, "loss": 0.0926, "step": 176200 }, { "epoch": 66.35, "learning_rate": 6.729394053443734e-06, "loss": 0.093, "step": 176300 }, { "epoch": 66.39, "learning_rate": 6.721866767030486e-06, "loss": 0.0929, "step": 176400 }, { "epoch": 66.43, "learning_rate": 6.714339480617238e-06, "loss": 0.0934, "step": 176500 }, { "epoch": 66.47, "learning_rate": 6.70681219420399e-06, "loss": 0.0936, "step": 176600 }, { "epoch": 66.5, "learning_rate": 6.699284907790742e-06, "loss": 0.0916, "step": 176700 }, { "epoch": 66.54, "learning_rate": 6.691757621377494e-06, "loss": 0.0921, "step": 176800 }, { "epoch": 66.58, "learning_rate": 6.684230334964246e-06, "loss": 0.094, "step": 176900 }, { "epoch": 66.62, "learning_rate": 6.6767030485509975e-06, "loss": 0.0915, "step": 177000 }, { "epoch": 66.65, "learning_rate": 6.669175762137749e-06, "loss": 0.0919, "step": 177100 }, { "epoch": 66.69, "learning_rate": 6.661648475724502e-06, "loss": 0.0936, "step": 177200 }, { "epoch": 66.73, "learning_rate": 6.654121189311253e-06, "loss": 0.0927, "step": 177300 }, { "epoch": 66.77, "learning_rate": 6.646593902898005e-06, "loss": 0.0921, "step": 177400 }, { "epoch": 66.8, "learning_rate": 6.6390666164847575e-06, "loss": 0.0929, "step": 177500 }, { "epoch": 66.84, "learning_rate": 6.63153933007151e-06, "loss": 0.0934, "step": 177600 }, { "epoch": 66.88, "learning_rate": 6.6240120436582625e-06, "loss": 0.0932, "step": 177700 }, { "epoch": 66.92, "learning_rate": 6.616484757245014e-06, "loss": 0.0944, "step": 177800 }, { "epoch": 66.96, "learning_rate": 6.608957470831766e-06, "loss": 0.092, "step": 177900 }, { "epoch": 66.99, "learning_rate": 6.601430184418518e-06, "loss": 0.0948, "step": 178000 }, { "epoch": 67.0, "eval_loss": 0.09316383302211761, "eval_runtime": 44.8531, "eval_samples_per_second": 167.212, "eval_steps_per_second": 10.456, "step": 178019 }, { "epoch": 67.03, "learning_rate": 6.59390289800527e-06, "loss": 0.0931, "step": 178100 }, { "epoch": 67.07, "learning_rate": 6.586375611592022e-06, "loss": 0.0929, "step": 178200 }, { "epoch": 67.11, "learning_rate": 6.578848325178774e-06, "loss": 0.0933, "step": 178300 }, { "epoch": 67.14, "learning_rate": 6.571321038765526e-06, "loss": 0.0909, "step": 178400 }, { "epoch": 67.18, "learning_rate": 6.5637937523522776e-06, "loss": 0.093, "step": 178500 }, { "epoch": 67.22, "learning_rate": 6.556266465939029e-06, "loss": 0.0942, "step": 178600 }, { "epoch": 67.26, "learning_rate": 6.548739179525782e-06, "loss": 0.0926, "step": 178700 }, { "epoch": 67.29, "learning_rate": 6.541211893112533e-06, "loss": 0.0921, "step": 178800 }, { "epoch": 67.33, "learning_rate": 6.533684606699285e-06, "loss": 0.0932, "step": 178900 }, { "epoch": 67.37, "learning_rate": 6.526157320286038e-06, "loss": 0.0939, "step": 179000 }, { "epoch": 67.41, "learning_rate": 6.518630033872789e-06, "loss": 0.0943, "step": 179100 }, { "epoch": 67.44, "learning_rate": 6.511102747459541e-06, "loss": 0.0925, "step": 179200 }, { "epoch": 67.48, "learning_rate": 6.5035754610462934e-06, "loss": 0.092, "step": 179300 }, { "epoch": 67.52, "learning_rate": 6.496048174633045e-06, "loss": 0.093, "step": 179400 }, { "epoch": 67.56, "learning_rate": 6.488520888219797e-06, "loss": 0.0942, "step": 179500 }, { "epoch": 67.6, "learning_rate": 6.480993601806549e-06, "loss": 0.0933, "step": 179600 }, { "epoch": 67.63, "learning_rate": 6.473466315393301e-06, "loss": 0.0921, "step": 179700 }, { "epoch": 67.67, "learning_rate": 6.465939028980053e-06, "loss": 0.0928, "step": 179800 }, { "epoch": 67.71, "learning_rate": 6.458411742566805e-06, "loss": 0.0945, "step": 179900 }, { "epoch": 67.75, "learning_rate": 6.450884456153557e-06, "loss": 0.0925, "step": 180000 }, { "epoch": 67.78, "learning_rate": 6.443357169740309e-06, "loss": 0.0912, "step": 180100 }, { "epoch": 67.82, "learning_rate": 6.435829883327062e-06, "loss": 0.0917, "step": 180200 }, { "epoch": 67.86, "learning_rate": 6.4283025969138135e-06, "loss": 0.093, "step": 180300 }, { "epoch": 67.9, "learning_rate": 6.420775310500565e-06, "loss": 0.0928, "step": 180400 }, { "epoch": 67.93, "learning_rate": 6.413248024087318e-06, "loss": 0.0915, "step": 180500 }, { "epoch": 67.97, "learning_rate": 6.405720737674069e-06, "loss": 0.0931, "step": 180600 }, { "epoch": 68.0, "eval_loss": 0.09266681969165802, "eval_runtime": 44.9069, "eval_samples_per_second": 167.012, "eval_steps_per_second": 10.444, "step": 180676 }, { "epoch": 68.01, "learning_rate": 6.398193451260821e-06, "loss": 0.0939, "step": 180700 }, { "epoch": 68.05, "learning_rate": 6.3906661648475735e-06, "loss": 0.0933, "step": 180800 }, { "epoch": 68.08, "learning_rate": 6.383138878434325e-06, "loss": 0.0922, "step": 180900 }, { "epoch": 68.12, "learning_rate": 6.375611592021077e-06, "loss": 0.0922, "step": 181000 }, { "epoch": 68.16, "learning_rate": 6.368084305607829e-06, "loss": 0.0935, "step": 181100 }, { "epoch": 68.2, "learning_rate": 6.360557019194581e-06, "loss": 0.0938, "step": 181200 }, { "epoch": 68.23, "learning_rate": 6.353029732781333e-06, "loss": 0.0929, "step": 181300 }, { "epoch": 68.27, "learning_rate": 6.345502446368084e-06, "loss": 0.093, "step": 181400 }, { "epoch": 68.31, "learning_rate": 6.337975159954837e-06, "loss": 0.0932, "step": 181500 }, { "epoch": 68.35, "learning_rate": 6.3304478735415885e-06, "loss": 0.0914, "step": 181600 }, { "epoch": 68.39, "learning_rate": 6.32292058712834e-06, "loss": 0.0926, "step": 181700 }, { "epoch": 68.42, "learning_rate": 6.315393300715093e-06, "loss": 0.0935, "step": 181800 }, { "epoch": 68.46, "learning_rate": 6.307866014301844e-06, "loss": 0.0923, "step": 181900 }, { "epoch": 68.5, "learning_rate": 6.300338727888596e-06, "loss": 0.0921, "step": 182000 }, { "epoch": 68.54, "learning_rate": 6.2928114414753486e-06, "loss": 0.0929, "step": 182100 }, { "epoch": 68.57, "learning_rate": 6.2852841550621e-06, "loss": 0.0935, "step": 182200 }, { "epoch": 68.61, "learning_rate": 6.277756868648852e-06, "loss": 0.0923, "step": 182300 }, { "epoch": 68.65, "learning_rate": 6.270229582235604e-06, "loss": 0.093, "step": 182400 }, { "epoch": 68.69, "learning_rate": 6.262702295822356e-06, "loss": 0.0912, "step": 182500 }, { "epoch": 68.72, "learning_rate": 6.2551750094091094e-06, "loss": 0.0922, "step": 182600 }, { "epoch": 68.76, "learning_rate": 6.247647722995861e-06, "loss": 0.0919, "step": 182700 }, { "epoch": 68.8, "learning_rate": 6.240120436582613e-06, "loss": 0.0933, "step": 182800 }, { "epoch": 68.84, "learning_rate": 6.2325931501693644e-06, "loss": 0.0936, "step": 182900 }, { "epoch": 68.87, "learning_rate": 6.225065863756117e-06, "loss": 0.0935, "step": 183000 }, { "epoch": 68.91, "learning_rate": 6.217538577342869e-06, "loss": 0.0917, "step": 183100 }, { "epoch": 68.95, "learning_rate": 6.21001129092962e-06, "loss": 0.0916, "step": 183200 }, { "epoch": 68.99, "learning_rate": 6.202484004516373e-06, "loss": 0.0911, "step": 183300 }, { "epoch": 69.0, "eval_loss": 0.092154860496521, "eval_runtime": 44.9385, "eval_samples_per_second": 166.895, "eval_steps_per_second": 10.436, "step": 183333 }, { "epoch": 69.03, "learning_rate": 6.1949567181031245e-06, "loss": 0.0919, "step": 183400 }, { "epoch": 69.06, "learning_rate": 6.187429431689876e-06, "loss": 0.0931, "step": 183500 }, { "epoch": 69.1, "learning_rate": 6.179902145276629e-06, "loss": 0.0923, "step": 183600 }, { "epoch": 69.14, "learning_rate": 6.17237485886338e-06, "loss": 0.0927, "step": 183700 }, { "epoch": 69.18, "learning_rate": 6.164847572450132e-06, "loss": 0.0942, "step": 183800 }, { "epoch": 69.21, "learning_rate": 6.1573202860368845e-06, "loss": 0.0926, "step": 183900 }, { "epoch": 69.25, "learning_rate": 6.149792999623636e-06, "loss": 0.0943, "step": 184000 }, { "epoch": 69.29, "learning_rate": 6.142265713210388e-06, "loss": 0.0918, "step": 184100 }, { "epoch": 69.33, "learning_rate": 6.13473842679714e-06, "loss": 0.0933, "step": 184200 }, { "epoch": 69.36, "learning_rate": 6.127211140383892e-06, "loss": 0.093, "step": 184300 }, { "epoch": 69.4, "learning_rate": 6.119683853970644e-06, "loss": 0.0927, "step": 184400 }, { "epoch": 69.44, "learning_rate": 6.112156567557395e-06, "loss": 0.0925, "step": 184500 }, { "epoch": 69.48, "learning_rate": 6.104629281144148e-06, "loss": 0.0922, "step": 184600 }, { "epoch": 69.51, "learning_rate": 6.0971019947308995e-06, "loss": 0.0926, "step": 184700 }, { "epoch": 69.55, "learning_rate": 6.089574708317651e-06, "loss": 0.0928, "step": 184800 }, { "epoch": 69.59, "learning_rate": 6.082047421904404e-06, "loss": 0.092, "step": 184900 }, { "epoch": 69.63, "learning_rate": 6.074520135491156e-06, "loss": 0.0934, "step": 185000 }, { "epoch": 69.67, "learning_rate": 6.066992849077909e-06, "loss": 0.0927, "step": 185100 }, { "epoch": 69.7, "learning_rate": 6.05946556266466e-06, "loss": 0.0923, "step": 185200 }, { "epoch": 69.74, "learning_rate": 6.051938276251412e-06, "loss": 0.0933, "step": 185300 }, { "epoch": 69.78, "learning_rate": 6.0444109898381646e-06, "loss": 0.0914, "step": 185400 }, { "epoch": 69.82, "learning_rate": 6.036883703424916e-06, "loss": 0.0928, "step": 185500 }, { "epoch": 69.85, "learning_rate": 6.029356417011668e-06, "loss": 0.0917, "step": 185600 }, { "epoch": 69.89, "learning_rate": 6.02182913059842e-06, "loss": 0.0932, "step": 185700 }, { "epoch": 69.93, "learning_rate": 6.014301844185172e-06, "loss": 0.0917, "step": 185800 }, { "epoch": 69.97, "learning_rate": 6.006774557771924e-06, "loss": 0.0923, "step": 185900 }, { "epoch": 70.0, "eval_loss": 0.0924314558506012, "eval_runtime": 45.1886, "eval_samples_per_second": 165.971, "eval_steps_per_second": 10.379, "step": 185990 }, { "epoch": 70.0, "learning_rate": 5.999247271358675e-06, "loss": 0.0933, "step": 186000 }, { "epoch": 70.04, "learning_rate": 5.991719984945428e-06, "loss": 0.0918, "step": 186100 }, { "epoch": 70.08, "learning_rate": 5.98419269853218e-06, "loss": 0.0919, "step": 186200 }, { "epoch": 70.12, "learning_rate": 5.976665412118931e-06, "loss": 0.0942, "step": 186300 }, { "epoch": 70.15, "learning_rate": 5.969138125705684e-06, "loss": 0.092, "step": 186400 }, { "epoch": 70.19, "learning_rate": 5.9616108392924354e-06, "loss": 0.0923, "step": 186500 }, { "epoch": 70.23, "learning_rate": 5.954083552879187e-06, "loss": 0.0929, "step": 186600 }, { "epoch": 70.27, "learning_rate": 5.94655626646594e-06, "loss": 0.0932, "step": 186700 }, { "epoch": 70.3, "learning_rate": 5.939028980052691e-06, "loss": 0.0936, "step": 186800 }, { "epoch": 70.34, "learning_rate": 5.931501693639443e-06, "loss": 0.0931, "step": 186900 }, { "epoch": 70.38, "learning_rate": 5.9239744072261955e-06, "loss": 0.0919, "step": 187000 }, { "epoch": 70.42, "learning_rate": 5.916447120812947e-06, "loss": 0.092, "step": 187100 }, { "epoch": 70.46, "learning_rate": 5.908919834399699e-06, "loss": 0.0949, "step": 187200 }, { "epoch": 70.49, "learning_rate": 5.901392547986451e-06, "loss": 0.0931, "step": 187300 }, { "epoch": 70.53, "learning_rate": 5.893865261573203e-06, "loss": 0.0927, "step": 187400 }, { "epoch": 70.57, "learning_rate": 5.8863379751599555e-06, "loss": 0.0918, "step": 187500 }, { "epoch": 70.61, "learning_rate": 5.878810688746708e-06, "loss": 0.0916, "step": 187600 }, { "epoch": 70.64, "learning_rate": 5.87128340233346e-06, "loss": 0.0926, "step": 187700 }, { "epoch": 70.68, "learning_rate": 5.863756115920211e-06, "loss": 0.0913, "step": 187800 }, { "epoch": 70.72, "learning_rate": 5.856228829506964e-06, "loss": 0.093, "step": 187900 }, { "epoch": 70.76, "learning_rate": 5.8487015430937155e-06, "loss": 0.091, "step": 188000 }, { "epoch": 70.79, "learning_rate": 5.841174256680467e-06, "loss": 0.0941, "step": 188100 }, { "epoch": 70.83, "learning_rate": 5.83364697026722e-06, "loss": 0.0933, "step": 188200 }, { "epoch": 70.87, "learning_rate": 5.826119683853971e-06, "loss": 0.0916, "step": 188300 }, { "epoch": 70.91, "learning_rate": 5.818592397440723e-06, "loss": 0.0946, "step": 188400 }, { "epoch": 70.94, "learning_rate": 5.8110651110274755e-06, "loss": 0.0927, "step": 188500 }, { "epoch": 70.98, "learning_rate": 5.803537824614227e-06, "loss": 0.0923, "step": 188600 }, { "epoch": 71.0, "eval_loss": 0.09234917163848877, "eval_runtime": 45.0733, "eval_samples_per_second": 166.395, "eval_steps_per_second": 10.405, "step": 188647 }, { "epoch": 71.02, "learning_rate": 5.796010538200979e-06, "loss": 0.0929, "step": 188700 }, { "epoch": 71.06, "learning_rate": 5.788483251787731e-06, "loss": 0.0928, "step": 188800 }, { "epoch": 71.1, "learning_rate": 5.780955965374483e-06, "loss": 0.0925, "step": 188900 }, { "epoch": 71.13, "learning_rate": 5.773428678961235e-06, "loss": 0.0928, "step": 189000 }, { "epoch": 71.17, "learning_rate": 5.765901392547986e-06, "loss": 0.0943, "step": 189100 }, { "epoch": 71.21, "learning_rate": 5.758374106134739e-06, "loss": 0.092, "step": 189200 }, { "epoch": 71.25, "learning_rate": 5.7508468197214906e-06, "loss": 0.0928, "step": 189300 }, { "epoch": 71.28, "learning_rate": 5.743319533308242e-06, "loss": 0.0917, "step": 189400 }, { "epoch": 71.32, "learning_rate": 5.735792246894995e-06, "loss": 0.0919, "step": 189500 }, { "epoch": 71.36, "learning_rate": 5.728264960481746e-06, "loss": 0.0922, "step": 189600 }, { "epoch": 71.4, "learning_rate": 5.720737674068498e-06, "loss": 0.0931, "step": 189700 }, { "epoch": 71.43, "learning_rate": 5.713210387655251e-06, "loss": 0.0922, "step": 189800 }, { "epoch": 71.47, "learning_rate": 5.705683101242002e-06, "loss": 0.0908, "step": 189900 }, { "epoch": 71.51, "learning_rate": 5.698155814828756e-06, "loss": 0.0909, "step": 190000 }, { "epoch": 71.55, "learning_rate": 5.690628528415507e-06, "loss": 0.0922, "step": 190100 }, { "epoch": 71.58, "learning_rate": 5.683101242002259e-06, "loss": 0.0941, "step": 190200 }, { "epoch": 71.62, "learning_rate": 5.675573955589011e-06, "loss": 0.0931, "step": 190300 }, { "epoch": 71.66, "learning_rate": 5.668046669175763e-06, "loss": 0.092, "step": 190400 }, { "epoch": 71.7, "learning_rate": 5.660519382762515e-06, "loss": 0.0903, "step": 190500 }, { "epoch": 71.74, "learning_rate": 5.6529920963492665e-06, "loss": 0.0917, "step": 190600 }, { "epoch": 71.77, "learning_rate": 5.645464809936019e-06, "loss": 0.0911, "step": 190700 }, { "epoch": 71.81, "learning_rate": 5.637937523522771e-06, "loss": 0.0924, "step": 190800 }, { "epoch": 71.85, "learning_rate": 5.630410237109522e-06, "loss": 0.0922, "step": 190900 }, { "epoch": 71.89, "learning_rate": 5.622882950696275e-06, "loss": 0.0935, "step": 191000 }, { "epoch": 71.92, "learning_rate": 5.6153556642830265e-06, "loss": 0.0912, "step": 191100 }, { "epoch": 71.96, "learning_rate": 5.607828377869778e-06, "loss": 0.0919, "step": 191200 }, { "epoch": 72.0, "learning_rate": 5.600301091456531e-06, "loss": 0.0929, "step": 191300 }, { "epoch": 72.0, "eval_loss": 0.09194895625114441, "eval_runtime": 45.1877, "eval_samples_per_second": 165.975, "eval_steps_per_second": 10.379, "step": 191304 }, { "epoch": 72.04, "learning_rate": 5.592773805043282e-06, "loss": 0.0934, "step": 191400 }, { "epoch": 72.07, "learning_rate": 5.585246518630034e-06, "loss": 0.0922, "step": 191500 }, { "epoch": 72.11, "learning_rate": 5.5777192322167865e-06, "loss": 0.0934, "step": 191600 }, { "epoch": 72.15, "learning_rate": 5.570191945803538e-06, "loss": 0.0902, "step": 191700 }, { "epoch": 72.19, "learning_rate": 5.56266465939029e-06, "loss": 0.0921, "step": 191800 }, { "epoch": 72.22, "learning_rate": 5.5551373729770415e-06, "loss": 0.0922, "step": 191900 }, { "epoch": 72.26, "learning_rate": 5.547610086563794e-06, "loss": 0.0925, "step": 192000 }, { "epoch": 72.3, "learning_rate": 5.540082800150546e-06, "loss": 0.0927, "step": 192100 }, { "epoch": 72.34, "learning_rate": 5.532555513737297e-06, "loss": 0.0923, "step": 192200 }, { "epoch": 72.37, "learning_rate": 5.52502822732405e-06, "loss": 0.0924, "step": 192300 }, { "epoch": 72.41, "learning_rate": 5.5175009409108015e-06, "loss": 0.0919, "step": 192400 }, { "epoch": 72.45, "learning_rate": 5.509973654497555e-06, "loss": 0.0937, "step": 192500 }, { "epoch": 72.49, "learning_rate": 5.5024463680843066e-06, "loss": 0.0919, "step": 192600 }, { "epoch": 72.53, "learning_rate": 5.494919081671058e-06, "loss": 0.0922, "step": 192700 }, { "epoch": 72.56, "learning_rate": 5.487391795257811e-06, "loss": 0.0925, "step": 192800 }, { "epoch": 72.6, "learning_rate": 5.479864508844562e-06, "loss": 0.0919, "step": 192900 }, { "epoch": 72.64, "learning_rate": 5.472337222431314e-06, "loss": 0.0908, "step": 193000 }, { "epoch": 72.68, "learning_rate": 5.464809936018067e-06, "loss": 0.0925, "step": 193100 }, { "epoch": 72.71, "learning_rate": 5.457282649604818e-06, "loss": 0.0921, "step": 193200 }, { "epoch": 72.75, "learning_rate": 5.44975536319157e-06, "loss": 0.0909, "step": 193300 }, { "epoch": 72.79, "learning_rate": 5.442228076778322e-06, "loss": 0.0938, "step": 193400 }, { "epoch": 72.83, "learning_rate": 5.434700790365074e-06, "loss": 0.0919, "step": 193500 }, { "epoch": 72.86, "learning_rate": 5.427173503951826e-06, "loss": 0.0941, "step": 193600 }, { "epoch": 72.9, "learning_rate": 5.4196462175385774e-06, "loss": 0.0916, "step": 193700 }, { "epoch": 72.94, "learning_rate": 5.41211893112533e-06, "loss": 0.0922, "step": 193800 }, { "epoch": 72.98, "learning_rate": 5.404591644712082e-06, "loss": 0.0916, "step": 193900 }, { "epoch": 73.0, "eval_loss": 0.09231603145599365, "eval_runtime": 45.0139, "eval_samples_per_second": 166.615, "eval_steps_per_second": 10.419, "step": 193961 }, { "epoch": 73.01, "learning_rate": 5.397064358298833e-06, "loss": 0.093, "step": 194000 }, { "epoch": 73.05, "learning_rate": 5.389537071885586e-06, "loss": 0.0926, "step": 194100 }, { "epoch": 73.09, "learning_rate": 5.3820097854723375e-06, "loss": 0.0935, "step": 194200 }, { "epoch": 73.13, "learning_rate": 5.374482499059089e-06, "loss": 0.0924, "step": 194300 }, { "epoch": 73.17, "learning_rate": 5.366955212645842e-06, "loss": 0.0918, "step": 194400 }, { "epoch": 73.2, "learning_rate": 5.359427926232593e-06, "loss": 0.0929, "step": 194500 }, { "epoch": 73.24, "learning_rate": 5.351900639819345e-06, "loss": 0.0932, "step": 194600 }, { "epoch": 73.28, "learning_rate": 5.3443733534060975e-06, "loss": 0.092, "step": 194700 }, { "epoch": 73.32, "learning_rate": 5.336846066992849e-06, "loss": 0.0913, "step": 194800 }, { "epoch": 73.35, "learning_rate": 5.329318780579601e-06, "loss": 0.093, "step": 194900 }, { "epoch": 73.39, "learning_rate": 5.321791494166354e-06, "loss": 0.0922, "step": 195000 }, { "epoch": 73.43, "learning_rate": 5.314264207753106e-06, "loss": 0.0898, "step": 195100 }, { "epoch": 73.47, "learning_rate": 5.3067369213398575e-06, "loss": 0.0921, "step": 195200 }, { "epoch": 73.5, "learning_rate": 5.29920963492661e-06, "loss": 0.0927, "step": 195300 }, { "epoch": 73.54, "learning_rate": 5.291682348513362e-06, "loss": 0.0932, "step": 195400 }, { "epoch": 73.58, "learning_rate": 5.284155062100113e-06, "loss": 0.092, "step": 195500 }, { "epoch": 73.62, "learning_rate": 5.276627775686866e-06, "loss": 0.091, "step": 195600 }, { "epoch": 73.65, "learning_rate": 5.2691004892736175e-06, "loss": 0.093, "step": 195700 }, { "epoch": 73.69, "learning_rate": 5.261573202860369e-06, "loss": 0.0943, "step": 195800 }, { "epoch": 73.73, "learning_rate": 5.254045916447122e-06, "loss": 0.0913, "step": 195900 }, { "epoch": 73.77, "learning_rate": 5.246518630033873e-06, "loss": 0.0905, "step": 196000 }, { "epoch": 73.81, "learning_rate": 5.238991343620625e-06, "loss": 0.0905, "step": 196100 }, { "epoch": 73.84, "learning_rate": 5.2314640572073776e-06, "loss": 0.0923, "step": 196200 }, { "epoch": 73.88, "learning_rate": 5.223936770794129e-06, "loss": 0.0923, "step": 196300 }, { "epoch": 73.92, "learning_rate": 5.216409484380881e-06, "loss": 0.0927, "step": 196400 }, { "epoch": 73.96, "learning_rate": 5.2088821979676326e-06, "loss": 0.093, "step": 196500 }, { "epoch": 73.99, "learning_rate": 5.201354911554385e-06, "loss": 0.0927, "step": 196600 }, { "epoch": 74.0, "eval_loss": 0.0920698270201683, "eval_runtime": 45.1732, "eval_samples_per_second": 166.028, "eval_steps_per_second": 10.382, "step": 196618 }, { "epoch": 74.03, "learning_rate": 5.193827625141137e-06, "loss": 0.0911, "step": 196700 }, { "epoch": 74.07, "learning_rate": 5.186300338727888e-06, "loss": 0.0921, "step": 196800 }, { "epoch": 74.11, "learning_rate": 5.178773052314641e-06, "loss": 0.0924, "step": 196900 }, { "epoch": 74.14, "learning_rate": 5.171245765901393e-06, "loss": 0.0937, "step": 197000 }, { "epoch": 74.18, "learning_rate": 5.163718479488144e-06, "loss": 0.0927, "step": 197100 }, { "epoch": 74.22, "learning_rate": 5.156191193074897e-06, "loss": 0.0932, "step": 197200 }, { "epoch": 74.26, "learning_rate": 5.1486639066616484e-06, "loss": 0.091, "step": 197300 }, { "epoch": 74.29, "learning_rate": 5.1411366202484e-06, "loss": 0.0914, "step": 197400 }, { "epoch": 74.33, "learning_rate": 5.1336093338351535e-06, "loss": 0.0921, "step": 197500 }, { "epoch": 74.37, "learning_rate": 5.126082047421905e-06, "loss": 0.0932, "step": 197600 }, { "epoch": 74.41, "learning_rate": 5.118554761008658e-06, "loss": 0.0928, "step": 197700 }, { "epoch": 74.44, "learning_rate": 5.111027474595409e-06, "loss": 0.0918, "step": 197800 }, { "epoch": 74.48, "learning_rate": 5.103500188182161e-06, "loss": 0.0894, "step": 197900 }, { "epoch": 74.52, "learning_rate": 5.095972901768913e-06, "loss": 0.091, "step": 198000 }, { "epoch": 74.56, "learning_rate": 5.088445615355665e-06, "loss": 0.0929, "step": 198100 }, { "epoch": 74.6, "learning_rate": 5.080918328942417e-06, "loss": 0.0931, "step": 198200 }, { "epoch": 74.63, "learning_rate": 5.0733910425291685e-06, "loss": 0.092, "step": 198300 }, { "epoch": 74.67, "learning_rate": 5.065863756115921e-06, "loss": 0.0916, "step": 198400 }, { "epoch": 74.71, "learning_rate": 5.058336469702673e-06, "loss": 0.0914, "step": 198500 }, { "epoch": 74.75, "learning_rate": 5.050809183289424e-06, "loss": 0.0929, "step": 198600 }, { "epoch": 74.78, "learning_rate": 5.043281896876177e-06, "loss": 0.0913, "step": 198700 }, { "epoch": 74.82, "learning_rate": 5.0357546104629285e-06, "loss": 0.0918, "step": 198800 }, { "epoch": 74.86, "learning_rate": 5.02822732404968e-06, "loss": 0.0922, "step": 198900 }, { "epoch": 74.9, "learning_rate": 5.020700037636433e-06, "loss": 0.0918, "step": 199000 }, { "epoch": 74.93, "learning_rate": 5.013172751223184e-06, "loss": 0.0918, "step": 199100 }, { "epoch": 74.97, "learning_rate": 5.005645464809936e-06, "loss": 0.0907, "step": 199200 }, { "epoch": 75.0, "eval_loss": 0.09217877686023712, "eval_runtime": 44.7295, "eval_samples_per_second": 167.675, "eval_steps_per_second": 10.485, "step": 199275 }, { "epoch": 75.01, "learning_rate": 4.9981181783966885e-06, "loss": 0.0917, "step": 199300 }, { "epoch": 75.05, "learning_rate": 4.99059089198344e-06, "loss": 0.0921, "step": 199400 }, { "epoch": 75.08, "learning_rate": 4.983063605570193e-06, "loss": 0.0925, "step": 199500 }, { "epoch": 75.12, "learning_rate": 4.975536319156944e-06, "loss": 0.0928, "step": 199600 }, { "epoch": 75.16, "learning_rate": 4.968009032743696e-06, "loss": 0.092, "step": 199700 }, { "epoch": 75.2, "learning_rate": 4.9604817463304486e-06, "loss": 0.0913, "step": 199800 }, { "epoch": 75.24, "learning_rate": 4.9529544599172e-06, "loss": 0.0928, "step": 199900 }, { "epoch": 75.27, "learning_rate": 4.945427173503952e-06, "loss": 0.0917, "step": 200000 }, { "epoch": 75.31, "learning_rate": 4.937899887090704e-06, "loss": 0.0928, "step": 200100 }, { "epoch": 75.35, "learning_rate": 4.930372600677456e-06, "loss": 0.0919, "step": 200200 }, { "epoch": 75.39, "learning_rate": 4.922845314264208e-06, "loss": 0.0919, "step": 200300 }, { "epoch": 75.42, "learning_rate": 4.91531802785096e-06, "loss": 0.092, "step": 200400 }, { "epoch": 75.46, "learning_rate": 4.907790741437712e-06, "loss": 0.0902, "step": 200500 }, { "epoch": 75.5, "learning_rate": 4.9002634550244644e-06, "loss": 0.0911, "step": 200600 }, { "epoch": 75.54, "learning_rate": 4.892736168611216e-06, "loss": 0.093, "step": 200700 }, { "epoch": 75.57, "learning_rate": 4.885208882197968e-06, "loss": 0.0917, "step": 200800 }, { "epoch": 75.61, "learning_rate": 4.87768159578472e-06, "loss": 0.0911, "step": 200900 }, { "epoch": 75.65, "learning_rate": 4.870154309371472e-06, "loss": 0.0927, "step": 201000 }, { "epoch": 75.69, "learning_rate": 4.862627022958224e-06, "loss": 0.0917, "step": 201100 }, { "epoch": 75.72, "learning_rate": 4.855099736544976e-06, "loss": 0.0936, "step": 201200 }, { "epoch": 75.76, "learning_rate": 4.847572450131728e-06, "loss": 0.0938, "step": 201300 }, { "epoch": 75.8, "learning_rate": 4.8400451637184795e-06, "loss": 0.0896, "step": 201400 }, { "epoch": 75.84, "learning_rate": 4.832517877305232e-06, "loss": 0.0934, "step": 201500 }, { "epoch": 75.88, "learning_rate": 4.824990590891984e-06, "loss": 0.0902, "step": 201600 }, { "epoch": 75.91, "learning_rate": 4.817463304478735e-06, "loss": 0.0906, "step": 201700 }, { "epoch": 75.95, "learning_rate": 4.809936018065488e-06, "loss": 0.0906, "step": 201800 }, { "epoch": 75.99, "learning_rate": 4.80240873165224e-06, "loss": 0.0927, "step": 201900 }, { "epoch": 76.0, "eval_loss": 0.09185120463371277, "eval_runtime": 44.8491, "eval_samples_per_second": 167.228, "eval_steps_per_second": 10.457, "step": 201932 }, { "epoch": 76.03, "learning_rate": 4.794881445238992e-06, "loss": 0.0924, "step": 202000 }, { "epoch": 76.06, "learning_rate": 4.787354158825744e-06, "loss": 0.0922, "step": 202100 }, { "epoch": 76.1, "learning_rate": 4.779826872412496e-06, "loss": 0.0933, "step": 202200 }, { "epoch": 76.14, "learning_rate": 4.772299585999248e-06, "loss": 0.0922, "step": 202300 }, { "epoch": 76.18, "learning_rate": 4.7647722995859995e-06, "loss": 0.0916, "step": 202400 }, { "epoch": 76.21, "learning_rate": 4.757245013172751e-06, "loss": 0.0916, "step": 202500 }, { "epoch": 76.25, "learning_rate": 4.749717726759504e-06, "loss": 0.0931, "step": 202600 }, { "epoch": 76.29, "learning_rate": 4.742190440346255e-06, "loss": 0.0907, "step": 202700 }, { "epoch": 76.33, "learning_rate": 4.734663153933007e-06, "loss": 0.092, "step": 202800 }, { "epoch": 76.36, "learning_rate": 4.7271358675197595e-06, "loss": 0.0912, "step": 202900 }, { "epoch": 76.4, "learning_rate": 4.719608581106512e-06, "loss": 0.0924, "step": 203000 }, { "epoch": 76.44, "learning_rate": 4.712081294693264e-06, "loss": 0.0915, "step": 203100 }, { "epoch": 76.48, "learning_rate": 4.704554008280015e-06, "loss": 0.0908, "step": 203200 }, { "epoch": 76.51, "learning_rate": 4.697026721866768e-06, "loss": 0.0919, "step": 203300 }, { "epoch": 76.55, "learning_rate": 4.6894994354535196e-06, "loss": 0.0912, "step": 203400 }, { "epoch": 76.59, "learning_rate": 4.681972149040271e-06, "loss": 0.092, "step": 203500 }, { "epoch": 76.63, "learning_rate": 4.674444862627024e-06, "loss": 0.091, "step": 203600 }, { "epoch": 76.67, "learning_rate": 4.666917576213775e-06, "loss": 0.0928, "step": 203700 }, { "epoch": 76.7, "learning_rate": 4.659390289800527e-06, "loss": 0.0902, "step": 203800 }, { "epoch": 76.74, "learning_rate": 4.651863003387279e-06, "loss": 0.0921, "step": 203900 }, { "epoch": 76.78, "learning_rate": 4.644335716974031e-06, "loss": 0.0899, "step": 204000 }, { "epoch": 76.82, "learning_rate": 4.636808430560783e-06, "loss": 0.0897, "step": 204100 }, { "epoch": 76.85, "learning_rate": 4.629281144147535e-06, "loss": 0.0909, "step": 204200 }, { "epoch": 76.89, "learning_rate": 4.621753857734287e-06, "loss": 0.0933, "step": 204300 }, { "epoch": 76.93, "learning_rate": 4.61422657132104e-06, "loss": 0.0937, "step": 204400 }, { "epoch": 76.97, "learning_rate": 4.606699284907791e-06, "loss": 0.0925, "step": 204500 }, { "epoch": 77.0, "eval_loss": 0.09133084863424301, "eval_runtime": 45.172, "eval_samples_per_second": 166.032, "eval_steps_per_second": 10.383, "step": 204589 }, { "epoch": 77.0, "learning_rate": 4.599171998494543e-06, "loss": 0.0911, "step": 204600 }, { "epoch": 77.04, "learning_rate": 4.5916447120812955e-06, "loss": 0.0936, "step": 204700 }, { "epoch": 77.08, "learning_rate": 4.584117425668047e-06, "loss": 0.0913, "step": 204800 }, { "epoch": 77.12, "learning_rate": 4.576590139254799e-06, "loss": 0.0911, "step": 204900 }, { "epoch": 77.15, "learning_rate": 4.569062852841551e-06, "loss": 0.0921, "step": 205000 }, { "epoch": 77.19, "learning_rate": 4.561535566428303e-06, "loss": 0.0918, "step": 205100 }, { "epoch": 77.23, "learning_rate": 4.554008280015055e-06, "loss": 0.0918, "step": 205200 }, { "epoch": 77.27, "learning_rate": 4.546480993601806e-06, "loss": 0.0922, "step": 205300 }, { "epoch": 77.31, "learning_rate": 4.538953707188559e-06, "loss": 0.0918, "step": 205400 }, { "epoch": 77.34, "learning_rate": 4.531426420775311e-06, "loss": 0.0921, "step": 205500 }, { "epoch": 77.38, "learning_rate": 4.523899134362063e-06, "loss": 0.0906, "step": 205600 }, { "epoch": 77.42, "learning_rate": 4.516371847948815e-06, "loss": 0.0905, "step": 205700 }, { "epoch": 77.46, "learning_rate": 4.508844561535567e-06, "loss": 0.0913, "step": 205800 }, { "epoch": 77.49, "learning_rate": 4.501317275122319e-06, "loss": 0.0928, "step": 205900 }, { "epoch": 77.53, "learning_rate": 4.4937899887090705e-06, "loss": 0.0931, "step": 206000 }, { "epoch": 77.57, "learning_rate": 4.486262702295823e-06, "loss": 0.0925, "step": 206100 }, { "epoch": 77.61, "learning_rate": 4.478735415882575e-06, "loss": 0.0907, "step": 206200 }, { "epoch": 77.64, "learning_rate": 4.471208129469326e-06, "loss": 0.0901, "step": 206300 }, { "epoch": 77.68, "learning_rate": 4.463680843056079e-06, "loss": 0.0925, "step": 206400 }, { "epoch": 77.72, "learning_rate": 4.4561535566428305e-06, "loss": 0.0899, "step": 206500 }, { "epoch": 77.76, "learning_rate": 4.448626270229582e-06, "loss": 0.0911, "step": 206600 }, { "epoch": 77.79, "learning_rate": 4.441098983816335e-06, "loss": 0.0921, "step": 206700 }, { "epoch": 77.83, "learning_rate": 4.433571697403086e-06, "loss": 0.092, "step": 206800 }, { "epoch": 77.87, "learning_rate": 4.426044410989839e-06, "loss": 0.0925, "step": 206900 }, { "epoch": 77.91, "learning_rate": 4.4185171245765906e-06, "loss": 0.0904, "step": 207000 }, { "epoch": 77.95, "learning_rate": 4.410989838163342e-06, "loss": 0.0904, "step": 207100 }, { "epoch": 77.98, "learning_rate": 4.403462551750095e-06, "loss": 0.0921, "step": 207200 }, { "epoch": 78.0, "eval_loss": 0.09170127660036087, "eval_runtime": 44.7794, "eval_samples_per_second": 167.488, "eval_steps_per_second": 10.474, "step": 207246 }, { "epoch": 78.02, "learning_rate": 4.395935265336846e-06, "loss": 0.0911, "step": 207300 }, { "epoch": 78.06, "learning_rate": 4.388407978923598e-06, "loss": 0.0918, "step": 207400 }, { "epoch": 78.1, "learning_rate": 4.380880692510351e-06, "loss": 0.0918, "step": 207500 }, { "epoch": 78.13, "learning_rate": 4.373353406097102e-06, "loss": 0.0901, "step": 207600 }, { "epoch": 78.17, "learning_rate": 4.365826119683854e-06, "loss": 0.0909, "step": 207700 }, { "epoch": 78.21, "learning_rate": 4.3582988332706064e-06, "loss": 0.0924, "step": 207800 }, { "epoch": 78.25, "learning_rate": 4.350771546857358e-06, "loss": 0.093, "step": 207900 }, { "epoch": 78.28, "learning_rate": 4.343244260444111e-06, "loss": 0.0917, "step": 208000 }, { "epoch": 78.32, "learning_rate": 4.335716974030862e-06, "loss": 0.0918, "step": 208100 }, { "epoch": 78.36, "learning_rate": 4.328189687617615e-06, "loss": 0.0917, "step": 208200 }, { "epoch": 78.4, "learning_rate": 4.3206624012043665e-06, "loss": 0.0918, "step": 208300 }, { "epoch": 78.43, "learning_rate": 4.313135114791118e-06, "loss": 0.0936, "step": 208400 }, { "epoch": 78.47, "learning_rate": 4.30560782837787e-06, "loss": 0.092, "step": 208500 }, { "epoch": 78.51, "learning_rate": 4.298080541964622e-06, "loss": 0.0911, "step": 208600 }, { "epoch": 78.55, "learning_rate": 4.290553255551374e-06, "loss": 0.0909, "step": 208700 }, { "epoch": 78.58, "learning_rate": 4.283025969138126e-06, "loss": 0.0913, "step": 208800 }, { "epoch": 78.62, "learning_rate": 4.275498682724878e-06, "loss": 0.0902, "step": 208900 }, { "epoch": 78.66, "learning_rate": 4.26797139631163e-06, "loss": 0.0917, "step": 209000 }, { "epoch": 78.7, "learning_rate": 4.2604441098983815e-06, "loss": 0.092, "step": 209100 }, { "epoch": 78.74, "learning_rate": 4.252916823485134e-06, "loss": 0.0927, "step": 209200 }, { "epoch": 78.77, "learning_rate": 4.2453895370718865e-06, "loss": 0.093, "step": 209300 }, { "epoch": 78.81, "learning_rate": 4.237862250658638e-06, "loss": 0.0923, "step": 209400 }, { "epoch": 78.85, "learning_rate": 4.23033496424539e-06, "loss": 0.0911, "step": 209500 }, { "epoch": 78.89, "learning_rate": 4.222807677832142e-06, "loss": 0.0914, "step": 209600 }, { "epoch": 78.92, "learning_rate": 4.215280391418894e-06, "loss": 0.092, "step": 209700 }, { "epoch": 78.96, "learning_rate": 4.207753105005646e-06, "loss": 0.0919, "step": 209800 }, { "epoch": 79.0, "learning_rate": 4.200225818592397e-06, "loss": 0.0895, "step": 209900 }, { "epoch": 79.0, "eval_loss": 0.09116315096616745, "eval_runtime": 45.3163, "eval_samples_per_second": 165.503, "eval_steps_per_second": 10.349, "step": 209903 }, { "epoch": 79.04, "learning_rate": 4.19269853217915e-06, "loss": 0.0912, "step": 210000 }, { "epoch": 79.07, "learning_rate": 4.1851712457659015e-06, "loss": 0.0909, "step": 210100 }, { "epoch": 79.11, "learning_rate": 4.177643959352653e-06, "loss": 0.0911, "step": 210200 }, { "epoch": 79.15, "learning_rate": 4.170116672939406e-06, "loss": 0.0925, "step": 210300 }, { "epoch": 79.19, "learning_rate": 4.162589386526157e-06, "loss": 0.0926, "step": 210400 }, { "epoch": 79.22, "learning_rate": 4.15506210011291e-06, "loss": 0.0913, "step": 210500 }, { "epoch": 79.26, "learning_rate": 4.1475348136996616e-06, "loss": 0.0907, "step": 210600 }, { "epoch": 79.3, "learning_rate": 4.140007527286414e-06, "loss": 0.0912, "step": 210700 }, { "epoch": 79.34, "learning_rate": 4.132480240873166e-06, "loss": 0.0906, "step": 210800 }, { "epoch": 79.38, "learning_rate": 4.124952954459917e-06, "loss": 0.0914, "step": 210900 }, { "epoch": 79.41, "learning_rate": 4.11742566804667e-06, "loss": 0.0916, "step": 211000 }, { "epoch": 79.45, "learning_rate": 4.109898381633422e-06, "loss": 0.0909, "step": 211100 }, { "epoch": 79.49, "learning_rate": 4.102371095220173e-06, "loss": 0.0917, "step": 211200 }, { "epoch": 79.53, "learning_rate": 4.094843808806925e-06, "loss": 0.0917, "step": 211300 }, { "epoch": 79.56, "learning_rate": 4.0873165223936774e-06, "loss": 0.0915, "step": 211400 }, { "epoch": 79.6, "learning_rate": 4.079789235980429e-06, "loss": 0.091, "step": 211500 }, { "epoch": 79.64, "learning_rate": 4.072261949567181e-06, "loss": 0.0915, "step": 211600 }, { "epoch": 79.68, "learning_rate": 4.064734663153933e-06, "loss": 0.0919, "step": 211700 }, { "epoch": 79.71, "learning_rate": 4.057207376740686e-06, "loss": 0.0909, "step": 211800 }, { "epoch": 79.75, "learning_rate": 4.0496800903274375e-06, "loss": 0.0915, "step": 211900 }, { "epoch": 79.79, "learning_rate": 4.042152803914189e-06, "loss": 0.0904, "step": 212000 }, { "epoch": 79.83, "learning_rate": 4.034625517500942e-06, "loss": 0.09, "step": 212100 }, { "epoch": 79.86, "learning_rate": 4.027098231087693e-06, "loss": 0.0918, "step": 212200 }, { "epoch": 79.9, "learning_rate": 4.019570944674445e-06, "loss": 0.0912, "step": 212300 }, { "epoch": 79.94, "learning_rate": 4.0120436582611975e-06, "loss": 0.0903, "step": 212400 }, { "epoch": 79.98, "learning_rate": 4.004516371847949e-06, "loss": 0.0916, "step": 212500 }, { "epoch": 80.0, "eval_loss": 0.09135947376489639, "eval_runtime": 45.1657, "eval_samples_per_second": 166.055, "eval_steps_per_second": 10.384, "step": 212560 }, { "epoch": 80.02, "learning_rate": 3.996989085434701e-06, "loss": 0.0909, "step": 212600 }, { "epoch": 80.05, "learning_rate": 3.989461799021453e-06, "loss": 0.0912, "step": 212700 }, { "epoch": 80.09, "learning_rate": 3.981934512608205e-06, "loss": 0.091, "step": 212800 }, { "epoch": 80.13, "learning_rate": 3.974407226194957e-06, "loss": 0.0916, "step": 212900 }, { "epoch": 80.17, "learning_rate": 3.966879939781709e-06, "loss": 0.0918, "step": 213000 }, { "epoch": 80.2, "learning_rate": 3.959352653368461e-06, "loss": 0.0927, "step": 213100 }, { "epoch": 80.24, "learning_rate": 3.951825366955213e-06, "loss": 0.0928, "step": 213200 }, { "epoch": 80.28, "learning_rate": 3.944298080541965e-06, "loss": 0.0902, "step": 213300 }, { "epoch": 80.32, "learning_rate": 3.936770794128717e-06, "loss": 0.0909, "step": 213400 }, { "epoch": 80.35, "learning_rate": 3.929243507715469e-06, "loss": 0.0909, "step": 213500 }, { "epoch": 80.39, "learning_rate": 3.921716221302221e-06, "loss": 0.089, "step": 213600 }, { "epoch": 80.43, "learning_rate": 3.9141889348889725e-06, "loss": 0.0928, "step": 213700 }, { "epoch": 80.47, "learning_rate": 3.906661648475725e-06, "loss": 0.0895, "step": 213800 }, { "epoch": 80.5, "learning_rate": 3.899134362062477e-06, "loss": 0.091, "step": 213900 }, { "epoch": 80.54, "learning_rate": 3.891607075649228e-06, "loss": 0.0908, "step": 214000 }, { "epoch": 80.58, "learning_rate": 3.884079789235981e-06, "loss": 0.0921, "step": 214100 }, { "epoch": 80.62, "learning_rate": 3.8765525028227326e-06, "loss": 0.0904, "step": 214200 }, { "epoch": 80.65, "learning_rate": 3.869025216409485e-06, "loss": 0.0914, "step": 214300 }, { "epoch": 80.69, "learning_rate": 3.861497929996237e-06, "loss": 0.0898, "step": 214400 }, { "epoch": 80.73, "learning_rate": 3.853970643582988e-06, "loss": 0.0923, "step": 214500 }, { "epoch": 80.77, "learning_rate": 3.846443357169741e-06, "loss": 0.0934, "step": 214600 }, { "epoch": 80.81, "learning_rate": 3.838916070756493e-06, "loss": 0.0914, "step": 214700 }, { "epoch": 80.84, "learning_rate": 3.831388784343244e-06, "loss": 0.09, "step": 214800 }, { "epoch": 80.88, "learning_rate": 3.823861497929997e-06, "loss": 0.091, "step": 214900 }, { "epoch": 80.92, "learning_rate": 3.8163342115167484e-06, "loss": 0.0907, "step": 215000 }, { "epoch": 80.96, "learning_rate": 3.8088069251035005e-06, "loss": 0.0913, "step": 215100 }, { "epoch": 80.99, "learning_rate": 3.801279638690252e-06, "loss": 0.09, "step": 215200 }, { "epoch": 81.0, "eval_loss": 0.09087579697370529, "eval_runtime": 45.0879, "eval_samples_per_second": 166.342, "eval_steps_per_second": 10.402, "step": 215217 }, { "epoch": 81.03, "learning_rate": 3.7937523522770043e-06, "loss": 0.0912, "step": 215300 }, { "epoch": 81.07, "learning_rate": 3.786225065863756e-06, "loss": 0.0922, "step": 215400 }, { "epoch": 81.11, "learning_rate": 3.7786977794505085e-06, "loss": 0.0913, "step": 215500 }, { "epoch": 81.14, "learning_rate": 3.7711704930372606e-06, "loss": 0.0917, "step": 215600 }, { "epoch": 81.18, "learning_rate": 3.7636432066240126e-06, "loss": 0.0921, "step": 215700 }, { "epoch": 81.22, "learning_rate": 3.7561159202107643e-06, "loss": 0.0913, "step": 215800 }, { "epoch": 81.26, "learning_rate": 3.7485886337975164e-06, "loss": 0.0918, "step": 215900 }, { "epoch": 81.29, "learning_rate": 3.7410613473842685e-06, "loss": 0.0917, "step": 216000 }, { "epoch": 81.33, "learning_rate": 3.73353406097102e-06, "loss": 0.0888, "step": 216100 }, { "epoch": 81.37, "learning_rate": 3.7260067745577722e-06, "loss": 0.0896, "step": 216200 }, { "epoch": 81.41, "learning_rate": 3.718479488144524e-06, "loss": 0.0917, "step": 216300 }, { "epoch": 81.45, "learning_rate": 3.710952201731276e-06, "loss": 0.0918, "step": 216400 }, { "epoch": 81.48, "learning_rate": 3.703424915318028e-06, "loss": 0.0908, "step": 216500 }, { "epoch": 81.52, "learning_rate": 3.6958976289047798e-06, "loss": 0.0921, "step": 216600 }, { "epoch": 81.56, "learning_rate": 3.6883703424915323e-06, "loss": 0.0896, "step": 216700 }, { "epoch": 81.6, "learning_rate": 3.6808430560782844e-06, "loss": 0.0909, "step": 216800 }, { "epoch": 81.63, "learning_rate": 3.673315769665036e-06, "loss": 0.0912, "step": 216900 }, { "epoch": 81.67, "learning_rate": 3.665788483251788e-06, "loss": 0.0915, "step": 217000 }, { "epoch": 81.71, "learning_rate": 3.65826119683854e-06, "loss": 0.091, "step": 217100 }, { "epoch": 81.75, "learning_rate": 3.650733910425292e-06, "loss": 0.09, "step": 217200 }, { "epoch": 81.78, "learning_rate": 3.643206624012044e-06, "loss": 0.0912, "step": 217300 }, { "epoch": 81.82, "learning_rate": 3.635679337598796e-06, "loss": 0.0908, "step": 217400 }, { "epoch": 81.86, "learning_rate": 3.6281520511855477e-06, "loss": 0.0902, "step": 217500 }, { "epoch": 81.9, "learning_rate": 3.6206247647723e-06, "loss": 0.0907, "step": 217600 }, { "epoch": 81.93, "learning_rate": 3.6130974783590515e-06, "loss": 0.0892, "step": 217700 }, { "epoch": 81.97, "learning_rate": 3.6055701919458036e-06, "loss": 0.0916, "step": 217800 }, { "epoch": 82.0, "eval_loss": 0.09082730859518051, "eval_runtime": 45.1546, "eval_samples_per_second": 166.096, "eval_steps_per_second": 10.387, "step": 217874 }, { "epoch": 82.01, "learning_rate": 3.598042905532556e-06, "loss": 0.0915, "step": 217900 }, { "epoch": 82.05, "learning_rate": 3.590515619119308e-06, "loss": 0.091, "step": 218000 }, { "epoch": 82.09, "learning_rate": 3.58298833270606e-06, "loss": 0.0918, "step": 218100 }, { "epoch": 82.12, "learning_rate": 3.575461046292812e-06, "loss": 0.0911, "step": 218200 }, { "epoch": 82.16, "learning_rate": 3.5679337598795636e-06, "loss": 0.0907, "step": 218300 }, { "epoch": 82.2, "learning_rate": 3.5604064734663157e-06, "loss": 0.0912, "step": 218400 }, { "epoch": 82.24, "learning_rate": 3.5528791870530678e-06, "loss": 0.0915, "step": 218500 }, { "epoch": 82.27, "learning_rate": 3.5453519006398194e-06, "loss": 0.0909, "step": 218600 }, { "epoch": 82.31, "learning_rate": 3.5378246142265715e-06, "loss": 0.0915, "step": 218700 }, { "epoch": 82.35, "learning_rate": 3.5302973278133236e-06, "loss": 0.0895, "step": 218800 }, { "epoch": 82.39, "learning_rate": 3.5227700414000753e-06, "loss": 0.0909, "step": 218900 }, { "epoch": 82.42, "learning_rate": 3.5152427549868274e-06, "loss": 0.0922, "step": 219000 }, { "epoch": 82.46, "learning_rate": 3.507715468573579e-06, "loss": 0.0923, "step": 219100 }, { "epoch": 82.5, "learning_rate": 3.5001881821603316e-06, "loss": 0.0923, "step": 219200 }, { "epoch": 82.54, "learning_rate": 3.4926608957470836e-06, "loss": 0.0904, "step": 219300 }, { "epoch": 82.57, "learning_rate": 3.4851336093338357e-06, "loss": 0.0904, "step": 219400 }, { "epoch": 82.61, "learning_rate": 3.4776063229205874e-06, "loss": 0.0921, "step": 219500 }, { "epoch": 82.65, "learning_rate": 3.4700790365073395e-06, "loss": 0.0904, "step": 219600 }, { "epoch": 82.69, "learning_rate": 3.4625517500940916e-06, "loss": 0.0903, "step": 219700 }, { "epoch": 82.72, "learning_rate": 3.4550244636808433e-06, "loss": 0.0911, "step": 219800 }, { "epoch": 82.76, "learning_rate": 3.4474971772675953e-06, "loss": 0.0904, "step": 219900 }, { "epoch": 82.8, "learning_rate": 3.439969890854347e-06, "loss": 0.0898, "step": 220000 }, { "epoch": 82.84, "learning_rate": 3.432442604441099e-06, "loss": 0.0906, "step": 220100 }, { "epoch": 82.88, "learning_rate": 3.424915318027851e-06, "loss": 0.0885, "step": 220200 }, { "epoch": 82.91, "learning_rate": 3.417388031614603e-06, "loss": 0.0917, "step": 220300 }, { "epoch": 82.95, "learning_rate": 3.4098607452013554e-06, "loss": 0.0888, "step": 220400 }, { "epoch": 82.99, "learning_rate": 3.4023334587881075e-06, "loss": 0.0902, "step": 220500 }, { "epoch": 83.0, "eval_loss": 0.09073475003242493, "eval_runtime": 44.912, "eval_samples_per_second": 166.993, "eval_steps_per_second": 10.443, "step": 220531 }, { "epoch": 83.03, "learning_rate": 3.394806172374859e-06, "loss": 0.0911, "step": 220600 }, { "epoch": 83.06, "learning_rate": 3.3872788859616112e-06, "loss": 0.0922, "step": 220700 }, { "epoch": 83.1, "learning_rate": 3.3797515995483633e-06, "loss": 0.0911, "step": 220800 }, { "epoch": 83.14, "learning_rate": 3.372224313135115e-06, "loss": 0.0914, "step": 220900 }, { "epoch": 83.18, "learning_rate": 3.364697026721867e-06, "loss": 0.0912, "step": 221000 }, { "epoch": 83.21, "learning_rate": 3.357169740308619e-06, "loss": 0.091, "step": 221100 }, { "epoch": 83.25, "learning_rate": 3.349642453895371e-06, "loss": 0.0911, "step": 221200 }, { "epoch": 83.29, "learning_rate": 3.342115167482123e-06, "loss": 0.0896, "step": 221300 }, { "epoch": 83.33, "learning_rate": 3.3345878810688746e-06, "loss": 0.0926, "step": 221400 }, { "epoch": 83.36, "learning_rate": 3.3270605946556267e-06, "loss": 0.0896, "step": 221500 }, { "epoch": 83.4, "learning_rate": 3.3195333082423788e-06, "loss": 0.0918, "step": 221600 }, { "epoch": 83.44, "learning_rate": 3.3120060218291313e-06, "loss": 0.0904, "step": 221700 }, { "epoch": 83.48, "learning_rate": 3.304478735415883e-06, "loss": 0.0924, "step": 221800 }, { "epoch": 83.52, "learning_rate": 3.296951449002635e-06, "loss": 0.0898, "step": 221900 }, { "epoch": 83.55, "learning_rate": 3.289424162589387e-06, "loss": 0.0919, "step": 222000 }, { "epoch": 83.59, "learning_rate": 3.2818968761761388e-06, "loss": 0.0902, "step": 222100 }, { "epoch": 83.63, "learning_rate": 3.274369589762891e-06, "loss": 0.0911, "step": 222200 }, { "epoch": 83.67, "learning_rate": 3.2668423033496425e-06, "loss": 0.0907, "step": 222300 }, { "epoch": 83.7, "learning_rate": 3.2593150169363946e-06, "loss": 0.0907, "step": 222400 }, { "epoch": 83.74, "learning_rate": 3.2517877305231467e-06, "loss": 0.0894, "step": 222500 }, { "epoch": 83.78, "learning_rate": 3.2442604441098984e-06, "loss": 0.0912, "step": 222600 }, { "epoch": 83.82, "learning_rate": 3.2367331576966505e-06, "loss": 0.0919, "step": 222700 }, { "epoch": 83.85, "learning_rate": 3.2292058712834026e-06, "loss": 0.0906, "step": 222800 }, { "epoch": 83.89, "learning_rate": 3.2216785848701546e-06, "loss": 0.0902, "step": 222900 }, { "epoch": 83.93, "learning_rate": 3.2141512984569067e-06, "loss": 0.0908, "step": 223000 }, { "epoch": 83.97, "learning_rate": 3.206624012043659e-06, "loss": 0.0911, "step": 223100 }, { "epoch": 84.0, "eval_loss": 0.09099774062633514, "eval_runtime": 45.2441, "eval_samples_per_second": 165.768, "eval_steps_per_second": 10.366, "step": 223188 }, { "epoch": 84.0, "learning_rate": 3.1990967256304105e-06, "loss": 0.0915, "step": 223200 }, { "epoch": 84.04, "learning_rate": 3.1915694392171626e-06, "loss": 0.0904, "step": 223300 }, { "epoch": 84.08, "learning_rate": 3.1840421528039147e-06, "loss": 0.0894, "step": 223400 }, { "epoch": 84.12, "learning_rate": 3.1765148663906663e-06, "loss": 0.0901, "step": 223500 }, { "epoch": 84.16, "learning_rate": 3.1689875799774184e-06, "loss": 0.0901, "step": 223600 }, { "epoch": 84.19, "learning_rate": 3.16146029356417e-06, "loss": 0.091, "step": 223700 }, { "epoch": 84.23, "learning_rate": 3.153933007150922e-06, "loss": 0.0913, "step": 223800 }, { "epoch": 84.27, "learning_rate": 3.1464057207376743e-06, "loss": 0.091, "step": 223900 }, { "epoch": 84.31, "learning_rate": 3.138878434324426e-06, "loss": 0.0907, "step": 224000 }, { "epoch": 84.34, "learning_rate": 3.131351147911178e-06, "loss": 0.0913, "step": 224100 }, { "epoch": 84.38, "learning_rate": 3.1238238614979305e-06, "loss": 0.0898, "step": 224200 }, { "epoch": 84.42, "learning_rate": 3.1162965750846822e-06, "loss": 0.0897, "step": 224300 }, { "epoch": 84.46, "learning_rate": 3.1087692886714343e-06, "loss": 0.0897, "step": 224400 }, { "epoch": 84.49, "learning_rate": 3.1012420022581864e-06, "loss": 0.0915, "step": 224500 }, { "epoch": 84.53, "learning_rate": 3.093714715844938e-06, "loss": 0.0916, "step": 224600 }, { "epoch": 84.57, "learning_rate": 3.08618742943169e-06, "loss": 0.0932, "step": 224700 }, { "epoch": 84.61, "learning_rate": 3.0786601430184422e-06, "loss": 0.0909, "step": 224800 }, { "epoch": 84.64, "learning_rate": 3.071132856605194e-06, "loss": 0.0897, "step": 224900 }, { "epoch": 84.68, "learning_rate": 3.063605570191946e-06, "loss": 0.0914, "step": 225000 }, { "epoch": 84.72, "learning_rate": 3.0560782837786977e-06, "loss": 0.0891, "step": 225100 }, { "epoch": 84.76, "learning_rate": 3.0485509973654498e-06, "loss": 0.0896, "step": 225200 }, { "epoch": 84.79, "learning_rate": 3.041023710952202e-06, "loss": 0.0911, "step": 225300 }, { "epoch": 84.83, "learning_rate": 3.0334964245389544e-06, "loss": 0.0931, "step": 225400 }, { "epoch": 84.87, "learning_rate": 3.025969138125706e-06, "loss": 0.0922, "step": 225500 }, { "epoch": 84.91, "learning_rate": 3.018441851712458e-06, "loss": 0.0914, "step": 225600 }, { "epoch": 84.95, "learning_rate": 3.01091456529921e-06, "loss": 0.0914, "step": 225700 }, { "epoch": 84.98, "learning_rate": 3.003387278885962e-06, "loss": 0.091, "step": 225800 }, { "epoch": 85.0, "eval_loss": 0.0903320163488388, "eval_runtime": 45.2024, "eval_samples_per_second": 165.921, "eval_steps_per_second": 10.376, "step": 225845 }, { "epoch": 85.02, "learning_rate": 2.995859992472714e-06, "loss": 0.0899, "step": 225900 }, { "epoch": 85.06, "learning_rate": 2.9883327060594656e-06, "loss": 0.0913, "step": 226000 }, { "epoch": 85.1, "learning_rate": 2.9808054196462177e-06, "loss": 0.091, "step": 226100 }, { "epoch": 85.13, "learning_rate": 2.97327813323297e-06, "loss": 0.0931, "step": 226200 }, { "epoch": 85.17, "learning_rate": 2.9657508468197215e-06, "loss": 0.0925, "step": 226300 }, { "epoch": 85.21, "learning_rate": 2.9582235604064736e-06, "loss": 0.0903, "step": 226400 }, { "epoch": 85.25, "learning_rate": 2.9506962739932257e-06, "loss": 0.0908, "step": 226500 }, { "epoch": 85.28, "learning_rate": 2.9431689875799777e-06, "loss": 0.091, "step": 226600 }, { "epoch": 85.32, "learning_rate": 2.93564170116673e-06, "loss": 0.0901, "step": 226700 }, { "epoch": 85.36, "learning_rate": 2.928114414753482e-06, "loss": 0.0907, "step": 226800 }, { "epoch": 85.4, "learning_rate": 2.9205871283402336e-06, "loss": 0.0912, "step": 226900 }, { "epoch": 85.43, "learning_rate": 2.9130598419269857e-06, "loss": 0.0904, "step": 227000 }, { "epoch": 85.47, "learning_rate": 2.9055325555137378e-06, "loss": 0.0913, "step": 227100 }, { "epoch": 85.51, "learning_rate": 2.8980052691004894e-06, "loss": 0.0917, "step": 227200 }, { "epoch": 85.55, "learning_rate": 2.8904779826872415e-06, "loss": 0.0897, "step": 227300 }, { "epoch": 85.59, "learning_rate": 2.882950696273993e-06, "loss": 0.09, "step": 227400 }, { "epoch": 85.62, "learning_rate": 2.8754234098607453e-06, "loss": 0.0913, "step": 227500 }, { "epoch": 85.66, "learning_rate": 2.8678961234474974e-06, "loss": 0.0899, "step": 227600 }, { "epoch": 85.7, "learning_rate": 2.860368837034249e-06, "loss": 0.0905, "step": 227700 }, { "epoch": 85.74, "learning_rate": 2.852841550621001e-06, "loss": 0.0898, "step": 227800 }, { "epoch": 85.77, "learning_rate": 2.8453142642077536e-06, "loss": 0.09, "step": 227900 }, { "epoch": 85.81, "learning_rate": 2.8377869777945053e-06, "loss": 0.0908, "step": 228000 }, { "epoch": 85.85, "learning_rate": 2.8302596913812574e-06, "loss": 0.0906, "step": 228100 }, { "epoch": 85.89, "learning_rate": 2.8227324049680095e-06, "loss": 0.0904, "step": 228200 }, { "epoch": 85.92, "learning_rate": 2.815205118554761e-06, "loss": 0.0897, "step": 228300 }, { "epoch": 85.96, "learning_rate": 2.8076778321415132e-06, "loss": 0.091, "step": 228400 }, { "epoch": 86.0, "learning_rate": 2.8001505457282653e-06, "loss": 0.0903, "step": 228500 }, { "epoch": 86.0, "eval_loss": 0.0905364602804184, "eval_runtime": 45.2173, "eval_samples_per_second": 165.866, "eval_steps_per_second": 10.372, "step": 228502 }, { "epoch": 86.04, "learning_rate": 2.792623259315017e-06, "loss": 0.0907, "step": 228600 }, { "epoch": 86.07, "learning_rate": 2.785095972901769e-06, "loss": 0.0918, "step": 228700 }, { "epoch": 86.11, "learning_rate": 2.7775686864885208e-06, "loss": 0.0925, "step": 228800 }, { "epoch": 86.15, "learning_rate": 2.770041400075273e-06, "loss": 0.0897, "step": 228900 }, { "epoch": 86.19, "learning_rate": 2.762514113662025e-06, "loss": 0.0895, "step": 229000 }, { "epoch": 86.23, "learning_rate": 2.7549868272487774e-06, "loss": 0.088, "step": 229100 }, { "epoch": 86.26, "learning_rate": 2.747459540835529e-06, "loss": 0.0899, "step": 229200 }, { "epoch": 86.3, "learning_rate": 2.739932254422281e-06, "loss": 0.0898, "step": 229300 }, { "epoch": 86.34, "learning_rate": 2.7324049680090333e-06, "loss": 0.0904, "step": 229400 }, { "epoch": 86.38, "learning_rate": 2.724877681595785e-06, "loss": 0.0904, "step": 229500 }, { "epoch": 86.41, "learning_rate": 2.717350395182537e-06, "loss": 0.0906, "step": 229600 }, { "epoch": 86.45, "learning_rate": 2.7098231087692887e-06, "loss": 0.0899, "step": 229700 }, { "epoch": 86.49, "learning_rate": 2.702295822356041e-06, "loss": 0.0889, "step": 229800 }, { "epoch": 86.53, "learning_rate": 2.694768535942793e-06, "loss": 0.0915, "step": 229900 }, { "epoch": 86.56, "learning_rate": 2.6872412495295446e-06, "loss": 0.09, "step": 230000 }, { "epoch": 86.6, "learning_rate": 2.6797139631162967e-06, "loss": 0.0911, "step": 230100 }, { "epoch": 86.64, "learning_rate": 2.6721866767030487e-06, "loss": 0.0911, "step": 230200 }, { "epoch": 86.68, "learning_rate": 2.6646593902898004e-06, "loss": 0.0914, "step": 230300 }, { "epoch": 86.71, "learning_rate": 2.657132103876553e-06, "loss": 0.0904, "step": 230400 }, { "epoch": 86.75, "learning_rate": 2.649604817463305e-06, "loss": 0.0892, "step": 230500 }, { "epoch": 86.79, "learning_rate": 2.6420775310500567e-06, "loss": 0.0896, "step": 230600 }, { "epoch": 86.83, "learning_rate": 2.6345502446368088e-06, "loss": 0.0902, "step": 230700 }, { "epoch": 86.86, "learning_rate": 2.627022958223561e-06, "loss": 0.0907, "step": 230800 }, { "epoch": 86.9, "learning_rate": 2.6194956718103125e-06, "loss": 0.0904, "step": 230900 }, { "epoch": 86.94, "learning_rate": 2.6119683853970646e-06, "loss": 0.0906, "step": 231000 }, { "epoch": 86.98, "learning_rate": 2.6044410989838163e-06, "loss": 0.0907, "step": 231100 }, { "epoch": 87.0, "eval_loss": 0.09008638560771942, "eval_runtime": 45.2794, "eval_samples_per_second": 165.638, "eval_steps_per_second": 10.358, "step": 231159 }, { "epoch": 87.02, "learning_rate": 2.5969138125705684e-06, "loss": 0.0909, "step": 231200 }, { "epoch": 87.05, "learning_rate": 2.5893865261573205e-06, "loss": 0.0889, "step": 231300 }, { "epoch": 87.09, "learning_rate": 2.581859239744072e-06, "loss": 0.0905, "step": 231400 }, { "epoch": 87.13, "learning_rate": 2.5743319533308242e-06, "loss": 0.0887, "step": 231500 }, { "epoch": 87.17, "learning_rate": 2.5668046669175767e-06, "loss": 0.0914, "step": 231600 }, { "epoch": 87.2, "learning_rate": 2.559277380504329e-06, "loss": 0.0906, "step": 231700 }, { "epoch": 87.24, "learning_rate": 2.5517500940910805e-06, "loss": 0.0909, "step": 231800 }, { "epoch": 87.28, "learning_rate": 2.5442228076778326e-06, "loss": 0.09, "step": 231900 }, { "epoch": 87.32, "learning_rate": 2.5366955212645842e-06, "loss": 0.0888, "step": 232000 }, { "epoch": 87.35, "learning_rate": 2.5291682348513363e-06, "loss": 0.09, "step": 232100 }, { "epoch": 87.39, "learning_rate": 2.5216409484380884e-06, "loss": 0.0895, "step": 232200 }, { "epoch": 87.43, "learning_rate": 2.51411366202484e-06, "loss": 0.0907, "step": 232300 }, { "epoch": 87.47, "learning_rate": 2.506586375611592e-06, "loss": 0.0906, "step": 232400 }, { "epoch": 87.5, "learning_rate": 2.4990590891983443e-06, "loss": 0.0907, "step": 232500 }, { "epoch": 87.54, "learning_rate": 2.4915318027850964e-06, "loss": 0.0907, "step": 232600 }, { "epoch": 87.58, "learning_rate": 2.484004516371848e-06, "loss": 0.0907, "step": 232700 }, { "epoch": 87.62, "learning_rate": 2.4764772299586e-06, "loss": 0.0892, "step": 232800 }, { "epoch": 87.66, "learning_rate": 2.468949943545352e-06, "loss": 0.0908, "step": 232900 }, { "epoch": 87.69, "learning_rate": 2.461422657132104e-06, "loss": 0.0906, "step": 233000 }, { "epoch": 87.73, "learning_rate": 2.453895370718856e-06, "loss": 0.0907, "step": 233100 }, { "epoch": 87.77, "learning_rate": 2.446368084305608e-06, "loss": 0.0905, "step": 233200 }, { "epoch": 87.81, "learning_rate": 2.43884079789236e-06, "loss": 0.0913, "step": 233300 }, { "epoch": 87.84, "learning_rate": 2.431313511479112e-06, "loss": 0.0926, "step": 233400 }, { "epoch": 87.88, "learning_rate": 2.423786225065864e-06, "loss": 0.0924, "step": 233500 }, { "epoch": 87.92, "learning_rate": 2.416258938652616e-06, "loss": 0.0897, "step": 233600 }, { "epoch": 87.96, "learning_rate": 2.4087316522393677e-06, "loss": 0.0922, "step": 233700 }, { "epoch": 87.99, "learning_rate": 2.40120436582612e-06, "loss": 0.0908, "step": 233800 }, { "epoch": 88.0, "eval_loss": 0.0906805768609047, "eval_runtime": 44.9216, "eval_samples_per_second": 166.958, "eval_steps_per_second": 10.44, "step": 233816 }, { "epoch": 88.03, "learning_rate": 2.393677079412872e-06, "loss": 0.0916, "step": 233900 }, { "epoch": 88.07, "learning_rate": 2.386149792999624e-06, "loss": 0.0901, "step": 234000 }, { "epoch": 88.11, "learning_rate": 2.3786225065863756e-06, "loss": 0.0899, "step": 234100 }, { "epoch": 88.14, "learning_rate": 2.3710952201731277e-06, "loss": 0.0909, "step": 234200 }, { "epoch": 88.18, "learning_rate": 2.3635679337598798e-06, "loss": 0.0904, "step": 234300 }, { "epoch": 88.22, "learning_rate": 2.356040647346632e-06, "loss": 0.0914, "step": 234400 }, { "epoch": 88.26, "learning_rate": 2.348513360933384e-06, "loss": 0.091, "step": 234500 }, { "epoch": 88.3, "learning_rate": 2.3409860745201356e-06, "loss": 0.0919, "step": 234600 }, { "epoch": 88.33, "learning_rate": 2.3334587881068877e-06, "loss": 0.091, "step": 234700 }, { "epoch": 88.37, "learning_rate": 2.3259315016936394e-06, "loss": 0.0899, "step": 234800 }, { "epoch": 88.41, "learning_rate": 2.3184042152803915e-06, "loss": 0.0899, "step": 234900 }, { "epoch": 88.45, "learning_rate": 2.3108769288671436e-06, "loss": 0.0899, "step": 235000 }, { "epoch": 88.48, "learning_rate": 2.3033496424538956e-06, "loss": 0.0916, "step": 235100 }, { "epoch": 88.52, "learning_rate": 2.2958223560406477e-06, "loss": 0.0901, "step": 235200 }, { "epoch": 88.56, "learning_rate": 2.2882950696273994e-06, "loss": 0.0898, "step": 235300 }, { "epoch": 88.6, "learning_rate": 2.2807677832141515e-06, "loss": 0.0908, "step": 235400 }, { "epoch": 88.63, "learning_rate": 2.273240496800903e-06, "loss": 0.0891, "step": 235500 }, { "epoch": 88.67, "learning_rate": 2.2657132103876557e-06, "loss": 0.0909, "step": 235600 }, { "epoch": 88.71, "learning_rate": 2.2581859239744073e-06, "loss": 0.0889, "step": 235700 }, { "epoch": 88.75, "learning_rate": 2.2506586375611594e-06, "loss": 0.093, "step": 235800 }, { "epoch": 88.78, "learning_rate": 2.2431313511479115e-06, "loss": 0.0902, "step": 235900 }, { "epoch": 88.82, "learning_rate": 2.235604064734663e-06, "loss": 0.09, "step": 236000 }, { "epoch": 88.86, "learning_rate": 2.2280767783214153e-06, "loss": 0.0893, "step": 236100 }, { "epoch": 88.9, "learning_rate": 2.2205494919081674e-06, "loss": 0.0902, "step": 236200 }, { "epoch": 88.93, "learning_rate": 2.2130222054949194e-06, "loss": 0.0912, "step": 236300 }, { "epoch": 88.97, "learning_rate": 2.205494919081671e-06, "loss": 0.0911, "step": 236400 }, { "epoch": 89.0, "eval_loss": 0.09018085896968842, "eval_runtime": 45.1243, "eval_samples_per_second": 166.207, "eval_steps_per_second": 10.394, "step": 236473 }, { "epoch": 89.01, "learning_rate": 2.197967632668423e-06, "loss": 0.092, "step": 236500 }, { "epoch": 89.05, "learning_rate": 2.1904403462551753e-06, "loss": 0.0904, "step": 236600 }, { "epoch": 89.09, "learning_rate": 2.182913059841927e-06, "loss": 0.09, "step": 236700 }, { "epoch": 89.12, "learning_rate": 2.175385773428679e-06, "loss": 0.0911, "step": 236800 }, { "epoch": 89.16, "learning_rate": 2.167858487015431e-06, "loss": 0.0883, "step": 236900 }, { "epoch": 89.2, "learning_rate": 2.1603312006021832e-06, "loss": 0.0904, "step": 237000 }, { "epoch": 89.24, "learning_rate": 2.152803914188935e-06, "loss": 0.0912, "step": 237100 }, { "epoch": 89.27, "learning_rate": 2.145276627775687e-06, "loss": 0.0892, "step": 237200 }, { "epoch": 89.31, "learning_rate": 2.137749341362439e-06, "loss": 0.0912, "step": 237300 }, { "epoch": 89.35, "learning_rate": 2.1302220549491907e-06, "loss": 0.09, "step": 237400 }, { "epoch": 89.39, "learning_rate": 2.1226947685359433e-06, "loss": 0.0904, "step": 237500 }, { "epoch": 89.42, "learning_rate": 2.115167482122695e-06, "loss": 0.0913, "step": 237600 }, { "epoch": 89.46, "learning_rate": 2.107640195709447e-06, "loss": 0.0904, "step": 237700 }, { "epoch": 89.5, "learning_rate": 2.1001129092961987e-06, "loss": 0.0899, "step": 237800 }, { "epoch": 89.54, "learning_rate": 2.0925856228829508e-06, "loss": 0.0893, "step": 237900 }, { "epoch": 89.57, "learning_rate": 2.085058336469703e-06, "loss": 0.0903, "step": 238000 }, { "epoch": 89.61, "learning_rate": 2.077531050056455e-06, "loss": 0.09, "step": 238100 }, { "epoch": 89.65, "learning_rate": 2.070003763643207e-06, "loss": 0.0903, "step": 238200 }, { "epoch": 89.69, "learning_rate": 2.0624764772299587e-06, "loss": 0.0891, "step": 238300 }, { "epoch": 89.73, "learning_rate": 2.054949190816711e-06, "loss": 0.0892, "step": 238400 }, { "epoch": 89.76, "learning_rate": 2.0474219044034625e-06, "loss": 0.09, "step": 238500 }, { "epoch": 89.8, "learning_rate": 2.0398946179902146e-06, "loss": 0.0908, "step": 238600 }, { "epoch": 89.84, "learning_rate": 2.0323673315769666e-06, "loss": 0.0902, "step": 238700 }, { "epoch": 89.88, "learning_rate": 2.0248400451637187e-06, "loss": 0.0916, "step": 238800 }, { "epoch": 89.91, "learning_rate": 2.017312758750471e-06, "loss": 0.0891, "step": 238900 }, { "epoch": 89.95, "learning_rate": 2.0097854723372225e-06, "loss": 0.0898, "step": 239000 }, { "epoch": 89.99, "learning_rate": 2.0022581859239746e-06, "loss": 0.0905, "step": 239100 }, { "epoch": 90.0, "eval_loss": 0.09060540050268173, "eval_runtime": 45.1371, "eval_samples_per_second": 166.16, "eval_steps_per_second": 10.391, "step": 239130 }, { "epoch": 90.03, "learning_rate": 1.9947308995107267e-06, "loss": 0.0915, "step": 239200 }, { "epoch": 90.06, "learning_rate": 1.9872036130974783e-06, "loss": 0.0896, "step": 239300 }, { "epoch": 90.1, "learning_rate": 1.9796763266842304e-06, "loss": 0.0899, "step": 239400 }, { "epoch": 90.14, "learning_rate": 1.9721490402709825e-06, "loss": 0.091, "step": 239500 }, { "epoch": 90.18, "learning_rate": 1.9646217538577346e-06, "loss": 0.0894, "step": 239600 }, { "epoch": 90.21, "learning_rate": 1.9570944674444863e-06, "loss": 0.0897, "step": 239700 }, { "epoch": 90.25, "learning_rate": 1.9495671810312384e-06, "loss": 0.0905, "step": 239800 }, { "epoch": 90.29, "learning_rate": 1.9420398946179905e-06, "loss": 0.0893, "step": 239900 }, { "epoch": 90.33, "learning_rate": 1.9345126082047425e-06, "loss": 0.0904, "step": 240000 }, { "epoch": 90.37, "learning_rate": 1.926985321791494e-06, "loss": 0.0908, "step": 240100 }, { "epoch": 90.4, "learning_rate": 1.9194580353782463e-06, "loss": 0.0892, "step": 240200 }, { "epoch": 90.44, "learning_rate": 1.9119307489649984e-06, "loss": 0.0906, "step": 240300 }, { "epoch": 90.48, "learning_rate": 1.9044034625517503e-06, "loss": 0.0907, "step": 240400 }, { "epoch": 90.52, "learning_rate": 1.8968761761385021e-06, "loss": 0.0917, "step": 240500 }, { "epoch": 90.55, "learning_rate": 1.8893488897252542e-06, "loss": 0.0902, "step": 240600 }, { "epoch": 90.59, "learning_rate": 1.8818216033120063e-06, "loss": 0.0894, "step": 240700 }, { "epoch": 90.63, "learning_rate": 1.8742943168987582e-06, "loss": 0.0887, "step": 240800 }, { "epoch": 90.67, "learning_rate": 1.86676703048551e-06, "loss": 0.0913, "step": 240900 }, { "epoch": 90.7, "learning_rate": 1.859239744072262e-06, "loss": 0.091, "step": 241000 }, { "epoch": 90.74, "learning_rate": 1.851712457659014e-06, "loss": 0.0892, "step": 241100 }, { "epoch": 90.78, "learning_rate": 1.8441851712457661e-06, "loss": 0.0898, "step": 241200 }, { "epoch": 90.82, "learning_rate": 1.836657884832518e-06, "loss": 0.0888, "step": 241300 }, { "epoch": 90.85, "learning_rate": 1.82913059841927e-06, "loss": 0.0895, "step": 241400 }, { "epoch": 90.89, "learning_rate": 1.821603312006022e-06, "loss": 0.0905, "step": 241500 }, { "epoch": 90.93, "learning_rate": 1.8140760255927739e-06, "loss": 0.0906, "step": 241600 }, { "epoch": 90.97, "learning_rate": 1.8065487391795257e-06, "loss": 0.089, "step": 241700 }, { "epoch": 91.0, "eval_loss": 0.0901167169213295, "eval_runtime": 44.9942, "eval_samples_per_second": 166.688, "eval_steps_per_second": 10.424, "step": 241787 }, { "epoch": 91.0, "learning_rate": 1.799021452766278e-06, "loss": 0.0896, "step": 241800 }, { "epoch": 91.04, "learning_rate": 1.79149416635303e-06, "loss": 0.0903, "step": 241900 }, { "epoch": 91.08, "learning_rate": 1.7839668799397818e-06, "loss": 0.0903, "step": 242000 }, { "epoch": 91.12, "learning_rate": 1.7764395935265339e-06, "loss": 0.0887, "step": 242100 }, { "epoch": 91.16, "learning_rate": 1.7689123071132858e-06, "loss": 0.0893, "step": 242200 }, { "epoch": 91.19, "learning_rate": 1.7613850207000376e-06, "loss": 0.0905, "step": 242300 }, { "epoch": 91.23, "learning_rate": 1.7538577342867895e-06, "loss": 0.0896, "step": 242400 }, { "epoch": 91.27, "learning_rate": 1.7463304478735418e-06, "loss": 0.0897, "step": 242500 }, { "epoch": 91.31, "learning_rate": 1.7388031614602937e-06, "loss": 0.091, "step": 242600 }, { "epoch": 91.34, "learning_rate": 1.7312758750470458e-06, "loss": 0.0905, "step": 242700 }, { "epoch": 91.38, "learning_rate": 1.7237485886337977e-06, "loss": 0.0887, "step": 242800 }, { "epoch": 91.42, "learning_rate": 1.7162213022205495e-06, "loss": 0.0896, "step": 242900 }, { "epoch": 91.46, "learning_rate": 1.7086940158073014e-06, "loss": 0.0905, "step": 243000 }, { "epoch": 91.49, "learning_rate": 1.7011667293940537e-06, "loss": 0.0874, "step": 243100 }, { "epoch": 91.53, "learning_rate": 1.6936394429808056e-06, "loss": 0.0896, "step": 243200 }, { "epoch": 91.57, "learning_rate": 1.6861121565675575e-06, "loss": 0.0888, "step": 243300 }, { "epoch": 91.61, "learning_rate": 1.6785848701543096e-06, "loss": 0.0891, "step": 243400 }, { "epoch": 91.64, "learning_rate": 1.6710575837410615e-06, "loss": 0.0905, "step": 243500 }, { "epoch": 91.68, "learning_rate": 1.6635302973278133e-06, "loss": 0.0915, "step": 243600 }, { "epoch": 91.72, "learning_rate": 1.6560030109145656e-06, "loss": 0.0912, "step": 243700 }, { "epoch": 91.76, "learning_rate": 1.6484757245013175e-06, "loss": 0.0892, "step": 243800 }, { "epoch": 91.8, "learning_rate": 1.6409484380880694e-06, "loss": 0.089, "step": 243900 }, { "epoch": 91.83, "learning_rate": 1.6334211516748213e-06, "loss": 0.0894, "step": 244000 }, { "epoch": 91.87, "learning_rate": 1.6258938652615734e-06, "loss": 0.0902, "step": 244100 }, { "epoch": 91.91, "learning_rate": 1.6183665788483252e-06, "loss": 0.0898, "step": 244200 }, { "epoch": 91.95, "learning_rate": 1.6108392924350773e-06, "loss": 0.0897, "step": 244300 }, { "epoch": 91.98, "learning_rate": 1.6033120060218294e-06, "loss": 0.0908, "step": 244400 }, { "epoch": 92.0, "eval_loss": 0.08964475989341736, "eval_runtime": 43.9599, "eval_samples_per_second": 170.61, "eval_steps_per_second": 10.669, "step": 244444 }, { "epoch": 92.02, "learning_rate": 1.5957847196085813e-06, "loss": 0.0888, "step": 244500 }, { "epoch": 92.06, "learning_rate": 1.5882574331953332e-06, "loss": 0.0873, "step": 244600 }, { "epoch": 92.1, "learning_rate": 1.580730146782085e-06, "loss": 0.091, "step": 244700 }, { "epoch": 92.13, "learning_rate": 1.5732028603688371e-06, "loss": 0.0898, "step": 244800 }, { "epoch": 92.17, "learning_rate": 1.565675573955589e-06, "loss": 0.0895, "step": 244900 }, { "epoch": 92.21, "learning_rate": 1.5581482875423411e-06, "loss": 0.0898, "step": 245000 }, { "epoch": 92.25, "learning_rate": 1.5506210011290932e-06, "loss": 0.0912, "step": 245100 }, { "epoch": 92.28, "learning_rate": 1.543093714715845e-06, "loss": 0.09, "step": 245200 }, { "epoch": 92.32, "learning_rate": 1.535566428302597e-06, "loss": 0.0899, "step": 245300 }, { "epoch": 92.36, "learning_rate": 1.5280391418893488e-06, "loss": 0.0904, "step": 245400 }, { "epoch": 92.4, "learning_rate": 1.520511855476101e-06, "loss": 0.0907, "step": 245500 }, { "epoch": 92.44, "learning_rate": 1.512984569062853e-06, "loss": 0.0901, "step": 245600 }, { "epoch": 92.47, "learning_rate": 1.505457282649605e-06, "loss": 0.0912, "step": 245700 }, { "epoch": 92.51, "learning_rate": 1.497929996236357e-06, "loss": 0.0901, "step": 245800 }, { "epoch": 92.55, "learning_rate": 1.4904027098231089e-06, "loss": 0.091, "step": 245900 }, { "epoch": 92.59, "learning_rate": 1.4828754234098607e-06, "loss": 0.0893, "step": 246000 }, { "epoch": 92.62, "learning_rate": 1.4753481369966128e-06, "loss": 0.0899, "step": 246100 }, { "epoch": 92.66, "learning_rate": 1.467820850583365e-06, "loss": 0.0898, "step": 246200 }, { "epoch": 92.7, "learning_rate": 1.4602935641701168e-06, "loss": 0.0908, "step": 246300 }, { "epoch": 92.74, "learning_rate": 1.4527662777568689e-06, "loss": 0.0909, "step": 246400 }, { "epoch": 92.77, "learning_rate": 1.4452389913436208e-06, "loss": 0.0887, "step": 246500 }, { "epoch": 92.81, "learning_rate": 1.4377117049303726e-06, "loss": 0.089, "step": 246600 }, { "epoch": 92.85, "learning_rate": 1.4301844185171245e-06, "loss": 0.0903, "step": 246700 }, { "epoch": 92.89, "learning_rate": 1.4226571321038768e-06, "loss": 0.0889, "step": 246800 }, { "epoch": 92.92, "learning_rate": 1.4151298456906287e-06, "loss": 0.0894, "step": 246900 }, { "epoch": 92.96, "learning_rate": 1.4076025592773806e-06, "loss": 0.0913, "step": 247000 }, { "epoch": 93.0, "learning_rate": 1.4000752728641327e-06, "loss": 0.0894, "step": 247100 }, { "epoch": 93.0, "eval_loss": 0.08920498192310333, "eval_runtime": 43.79, "eval_samples_per_second": 171.272, "eval_steps_per_second": 10.71, "step": 247101 }, { "epoch": 93.04, "learning_rate": 1.3925479864508845e-06, "loss": 0.0895, "step": 247200 }, { "epoch": 93.07, "learning_rate": 1.3850207000376364e-06, "loss": 0.0895, "step": 247300 }, { "epoch": 93.11, "learning_rate": 1.3774934136243887e-06, "loss": 0.0912, "step": 247400 }, { "epoch": 93.15, "learning_rate": 1.3699661272111406e-06, "loss": 0.0905, "step": 247500 }, { "epoch": 93.19, "learning_rate": 1.3624388407978925e-06, "loss": 0.0893, "step": 247600 }, { "epoch": 93.23, "learning_rate": 1.3549115543846444e-06, "loss": 0.0889, "step": 247700 }, { "epoch": 93.26, "learning_rate": 1.3473842679713964e-06, "loss": 0.0902, "step": 247800 }, { "epoch": 93.3, "learning_rate": 1.3398569815581483e-06, "loss": 0.0891, "step": 247900 }, { "epoch": 93.34, "learning_rate": 1.3323296951449002e-06, "loss": 0.0896, "step": 248000 }, { "epoch": 93.38, "learning_rate": 1.3248024087316525e-06, "loss": 0.0899, "step": 248100 }, { "epoch": 93.41, "learning_rate": 1.3172751223184044e-06, "loss": 0.0898, "step": 248200 }, { "epoch": 93.45, "learning_rate": 1.3097478359051563e-06, "loss": 0.0884, "step": 248300 }, { "epoch": 93.49, "learning_rate": 1.3022205494919081e-06, "loss": 0.0902, "step": 248400 }, { "epoch": 93.53, "learning_rate": 1.2946932630786602e-06, "loss": 0.0923, "step": 248500 }, { "epoch": 93.56, "learning_rate": 1.2871659766654121e-06, "loss": 0.0896, "step": 248600 }, { "epoch": 93.6, "learning_rate": 1.2796386902521644e-06, "loss": 0.0904, "step": 248700 }, { "epoch": 93.64, "learning_rate": 1.2721114038389163e-06, "loss": 0.0911, "step": 248800 }, { "epoch": 93.68, "learning_rate": 1.2645841174256682e-06, "loss": 0.0897, "step": 248900 }, { "epoch": 93.71, "learning_rate": 1.25705683101242e-06, "loss": 0.0898, "step": 249000 }, { "epoch": 93.75, "learning_rate": 1.2495295445991721e-06, "loss": 0.0889, "step": 249100 }, { "epoch": 93.79, "learning_rate": 1.242002258185924e-06, "loss": 0.0892, "step": 249200 }, { "epoch": 93.83, "learning_rate": 1.234474971772676e-06, "loss": 0.0881, "step": 249300 }, { "epoch": 93.87, "learning_rate": 1.226947685359428e-06, "loss": 0.0904, "step": 249400 }, { "epoch": 93.9, "learning_rate": 1.21942039894618e-06, "loss": 0.0894, "step": 249500 }, { "epoch": 93.94, "learning_rate": 1.211893112532932e-06, "loss": 0.0904, "step": 249600 }, { "epoch": 93.98, "learning_rate": 1.2043658261196838e-06, "loss": 0.0899, "step": 249700 }, { "epoch": 94.0, "eval_loss": 0.08932201564311981, "eval_runtime": 43.7672, "eval_samples_per_second": 171.361, "eval_steps_per_second": 10.716, "step": 249758 }, { "epoch": 94.02, "learning_rate": 1.196838539706436e-06, "loss": 0.0896, "step": 249800 }, { "epoch": 94.05, "learning_rate": 1.1893112532931878e-06, "loss": 0.0895, "step": 249900 }, { "epoch": 94.09, "learning_rate": 1.1817839668799399e-06, "loss": 0.0899, "step": 250000 }, { "epoch": 94.13, "learning_rate": 1.174256680466692e-06, "loss": 0.0902, "step": 250100 }, { "epoch": 94.17, "learning_rate": 1.1667293940534439e-06, "loss": 0.0901, "step": 250200 }, { "epoch": 94.2, "learning_rate": 1.1592021076401957e-06, "loss": 0.0885, "step": 250300 }, { "epoch": 94.24, "learning_rate": 1.1516748212269478e-06, "loss": 0.0906, "step": 250400 }, { "epoch": 94.28, "learning_rate": 1.1441475348136997e-06, "loss": 0.0899, "step": 250500 }, { "epoch": 94.32, "learning_rate": 1.1366202484004516e-06, "loss": 0.0894, "step": 250600 }, { "epoch": 94.35, "learning_rate": 1.1290929619872037e-06, "loss": 0.0892, "step": 250700 }, { "epoch": 94.39, "learning_rate": 1.1215656755739558e-06, "loss": 0.0912, "step": 250800 }, { "epoch": 94.43, "learning_rate": 1.1140383891607076e-06, "loss": 0.0871, "step": 250900 }, { "epoch": 94.47, "learning_rate": 1.1065111027474597e-06, "loss": 0.0898, "step": 251000 }, { "epoch": 94.51, "learning_rate": 1.0989838163342116e-06, "loss": 0.0896, "step": 251100 }, { "epoch": 94.54, "learning_rate": 1.0914565299209635e-06, "loss": 0.0914, "step": 251200 }, { "epoch": 94.58, "learning_rate": 1.0839292435077156e-06, "loss": 0.0907, "step": 251300 }, { "epoch": 94.62, "learning_rate": 1.0764019570944674e-06, "loss": 0.0896, "step": 251400 }, { "epoch": 94.66, "learning_rate": 1.0688746706812195e-06, "loss": 0.0888, "step": 251500 }, { "epoch": 94.69, "learning_rate": 1.0613473842679716e-06, "loss": 0.0879, "step": 251600 }, { "epoch": 94.73, "learning_rate": 1.0538200978547235e-06, "loss": 0.0899, "step": 251700 }, { "epoch": 94.77, "learning_rate": 1.0462928114414754e-06, "loss": 0.0903, "step": 251800 }, { "epoch": 94.81, "learning_rate": 1.0387655250282275e-06, "loss": 0.0878, "step": 251900 }, { "epoch": 94.84, "learning_rate": 1.0312382386149794e-06, "loss": 0.0894, "step": 252000 }, { "epoch": 94.88, "learning_rate": 1.0237109522017312e-06, "loss": 0.0901, "step": 252100 }, { "epoch": 94.92, "learning_rate": 1.0161836657884833e-06, "loss": 0.0906, "step": 252200 }, { "epoch": 94.96, "learning_rate": 1.0086563793752354e-06, "loss": 0.0911, "step": 252300 }, { "epoch": 94.99, "learning_rate": 1.0011290929619873e-06, "loss": 0.0899, "step": 252400 }, { "epoch": 95.0, "eval_loss": 0.08966313302516937, "eval_runtime": 43.6014, "eval_samples_per_second": 172.013, "eval_steps_per_second": 10.757, "step": 252415 }, { "epoch": 95.03, "learning_rate": 9.936018065487392e-07, "loss": 0.0903, "step": 252500 }, { "epoch": 95.07, "learning_rate": 9.860745201354913e-07, "loss": 0.0907, "step": 252600 }, { "epoch": 95.11, "learning_rate": 9.785472337222431e-07, "loss": 0.0909, "step": 252700 }, { "epoch": 95.14, "learning_rate": 9.710199473089952e-07, "loss": 0.0911, "step": 252800 }, { "epoch": 95.18, "learning_rate": 9.63492660895747e-07, "loss": 0.0916, "step": 252900 }, { "epoch": 95.22, "learning_rate": 9.559653744824992e-07, "loss": 0.0908, "step": 253000 }, { "epoch": 95.26, "learning_rate": 9.484380880692511e-07, "loss": 0.0907, "step": 253100 }, { "epoch": 95.3, "learning_rate": 9.409108016560032e-07, "loss": 0.0892, "step": 253200 }, { "epoch": 95.33, "learning_rate": 9.33383515242755e-07, "loss": 0.091, "step": 253300 }, { "epoch": 95.37, "learning_rate": 9.25856228829507e-07, "loss": 0.0888, "step": 253400 }, { "epoch": 95.41, "learning_rate": 9.18328942416259e-07, "loss": 0.0892, "step": 253500 }, { "epoch": 95.45, "learning_rate": 9.10801656003011e-07, "loss": 0.09, "step": 253600 }, { "epoch": 95.48, "learning_rate": 9.032743695897629e-07, "loss": 0.0896, "step": 253700 }, { "epoch": 95.52, "learning_rate": 8.95747083176515e-07, "loss": 0.0891, "step": 253800 }, { "epoch": 95.56, "learning_rate": 8.882197967632669e-07, "loss": 0.0897, "step": 253900 }, { "epoch": 95.6, "learning_rate": 8.806925103500188e-07, "loss": 0.0893, "step": 254000 }, { "epoch": 95.63, "learning_rate": 8.731652239367709e-07, "loss": 0.0901, "step": 254100 }, { "epoch": 95.67, "learning_rate": 8.656379375235229e-07, "loss": 0.0887, "step": 254200 }, { "epoch": 95.71, "learning_rate": 8.581106511102748e-07, "loss": 0.0886, "step": 254300 }, { "epoch": 95.75, "learning_rate": 8.505833646970269e-07, "loss": 0.0907, "step": 254400 }, { "epoch": 95.78, "learning_rate": 8.430560782837787e-07, "loss": 0.0894, "step": 254500 }, { "epoch": 95.82, "learning_rate": 8.355287918705307e-07, "loss": 0.0899, "step": 254600 }, { "epoch": 95.86, "learning_rate": 8.280015054572828e-07, "loss": 0.0881, "step": 254700 }, { "epoch": 95.9, "learning_rate": 8.204742190440347e-07, "loss": 0.0902, "step": 254800 }, { "epoch": 95.94, "learning_rate": 8.129469326307867e-07, "loss": 0.0895, "step": 254900 }, { "epoch": 95.97, "learning_rate": 8.054196462175387e-07, "loss": 0.0904, "step": 255000 }, { "epoch": 96.0, "eval_loss": 0.0898142084479332, "eval_runtime": 43.5802, "eval_samples_per_second": 172.096, "eval_steps_per_second": 10.762, "step": 255072 }, { "epoch": 96.01, "learning_rate": 7.978923598042906e-07, "loss": 0.0897, "step": 255100 }, { "epoch": 96.05, "learning_rate": 7.903650733910425e-07, "loss": 0.0908, "step": 255200 }, { "epoch": 96.09, "learning_rate": 7.828377869777945e-07, "loss": 0.0905, "step": 255300 }, { "epoch": 96.12, "learning_rate": 7.753105005645466e-07, "loss": 0.0892, "step": 255400 }, { "epoch": 96.16, "learning_rate": 7.677832141512985e-07, "loss": 0.0891, "step": 255500 }, { "epoch": 96.2, "learning_rate": 7.602559277380505e-07, "loss": 0.0898, "step": 255600 }, { "epoch": 96.24, "learning_rate": 7.527286413248026e-07, "loss": 0.0893, "step": 255700 }, { "epoch": 96.27, "learning_rate": 7.452013549115544e-07, "loss": 0.0896, "step": 255800 }, { "epoch": 96.31, "learning_rate": 7.376740684983064e-07, "loss": 0.0897, "step": 255900 }, { "epoch": 96.35, "learning_rate": 7.301467820850584e-07, "loss": 0.0903, "step": 256000 }, { "epoch": 96.39, "learning_rate": 7.226194956718104e-07, "loss": 0.0898, "step": 256100 }, { "epoch": 96.42, "learning_rate": 7.150922092585623e-07, "loss": 0.0895, "step": 256200 }, { "epoch": 96.46, "learning_rate": 7.075649228453143e-07, "loss": 0.0902, "step": 256300 }, { "epoch": 96.5, "learning_rate": 7.000376364320663e-07, "loss": 0.0889, "step": 256400 }, { "epoch": 96.54, "learning_rate": 6.925103500188182e-07, "loss": 0.0893, "step": 256500 }, { "epoch": 96.58, "learning_rate": 6.849830636055703e-07, "loss": 0.0901, "step": 256600 }, { "epoch": 96.61, "learning_rate": 6.774557771923222e-07, "loss": 0.09, "step": 256700 }, { "epoch": 96.65, "learning_rate": 6.699284907790742e-07, "loss": 0.0889, "step": 256800 }, { "epoch": 96.69, "learning_rate": 6.624012043658263e-07, "loss": 0.0887, "step": 256900 }, { "epoch": 96.73, "learning_rate": 6.548739179525781e-07, "loss": 0.089, "step": 257000 }, { "epoch": 96.76, "learning_rate": 6.473466315393301e-07, "loss": 0.0907, "step": 257100 }, { "epoch": 96.8, "learning_rate": 6.398193451260822e-07, "loss": 0.0888, "step": 257200 }, { "epoch": 96.84, "learning_rate": 6.322920587128341e-07, "loss": 0.0882, "step": 257300 }, { "epoch": 96.88, "learning_rate": 6.247647722995861e-07, "loss": 0.0897, "step": 257400 }, { "epoch": 96.91, "learning_rate": 6.17237485886338e-07, "loss": 0.0893, "step": 257500 }, { "epoch": 96.95, "learning_rate": 6.0971019947309e-07, "loss": 0.0892, "step": 257600 }, { "epoch": 96.99, "learning_rate": 6.021829130598419e-07, "loss": 0.0906, "step": 257700 }, { "epoch": 97.0, "eval_loss": 0.08935380727052689, "eval_runtime": 43.4106, "eval_samples_per_second": 172.769, "eval_steps_per_second": 10.804, "step": 257729 }, { "epoch": 97.03, "learning_rate": 5.946556266465939e-07, "loss": 0.0902, "step": 257800 }, { "epoch": 97.06, "learning_rate": 5.87128340233346e-07, "loss": 0.0891, "step": 257900 }, { "epoch": 97.1, "learning_rate": 5.796010538200979e-07, "loss": 0.088, "step": 258000 }, { "epoch": 97.14, "learning_rate": 5.720737674068498e-07, "loss": 0.0905, "step": 258100 }, { "epoch": 97.18, "learning_rate": 5.645464809936018e-07, "loss": 0.088, "step": 258200 }, { "epoch": 97.21, "learning_rate": 5.570191945803538e-07, "loss": 0.0886, "step": 258300 }, { "epoch": 97.25, "learning_rate": 5.494919081671058e-07, "loss": 0.0889, "step": 258400 }, { "epoch": 97.29, "learning_rate": 5.419646217538578e-07, "loss": 0.0877, "step": 258500 }, { "epoch": 97.33, "learning_rate": 5.344373353406098e-07, "loss": 0.0904, "step": 258600 }, { "epoch": 97.37, "learning_rate": 5.269100489273618e-07, "loss": 0.0886, "step": 258700 }, { "epoch": 97.4, "learning_rate": 5.193827625141137e-07, "loss": 0.0896, "step": 258800 }, { "epoch": 97.44, "learning_rate": 5.118554761008656e-07, "loss": 0.0884, "step": 258900 }, { "epoch": 97.48, "learning_rate": 5.043281896876177e-07, "loss": 0.0896, "step": 259000 }, { "epoch": 97.52, "learning_rate": 4.968009032743696e-07, "loss": 0.0886, "step": 259100 }, { "epoch": 97.55, "learning_rate": 4.892736168611216e-07, "loss": 0.0895, "step": 259200 }, { "epoch": 97.59, "learning_rate": 4.817463304478736e-07, "loss": 0.0889, "step": 259300 }, { "epoch": 97.63, "learning_rate": 4.7421904403462554e-07, "loss": 0.0888, "step": 259400 }, { "epoch": 97.67, "learning_rate": 4.666917576213775e-07, "loss": 0.0897, "step": 259500 }, { "epoch": 97.7, "learning_rate": 4.591644712081295e-07, "loss": 0.0903, "step": 259600 }, { "epoch": 97.74, "learning_rate": 4.5163718479488144e-07, "loss": 0.0883, "step": 259700 }, { "epoch": 97.78, "learning_rate": 4.4410989838163347e-07, "loss": 0.0902, "step": 259800 }, { "epoch": 97.82, "learning_rate": 4.3658261196838546e-07, "loss": 0.09, "step": 259900 }, { "epoch": 97.85, "learning_rate": 4.290553255551374e-07, "loss": 0.0883, "step": 260000 }, { "epoch": 97.89, "learning_rate": 4.2152803914188937e-07, "loss": 0.0904, "step": 260100 }, { "epoch": 97.93, "learning_rate": 4.140007527286414e-07, "loss": 0.0891, "step": 260200 }, { "epoch": 97.97, "learning_rate": 4.0647346631539334e-07, "loss": 0.0892, "step": 260300 }, { "epoch": 98.0, "eval_loss": 0.08942902088165283, "eval_runtime": 43.3793, "eval_samples_per_second": 172.893, "eval_steps_per_second": 10.812, "step": 260386 }, { "epoch": 98.01, "learning_rate": 3.989461799021453e-07, "loss": 0.0902, "step": 260400 }, { "epoch": 98.04, "learning_rate": 3.9141889348889725e-07, "loss": 0.0906, "step": 260500 }, { "epoch": 98.08, "learning_rate": 3.8389160707564924e-07, "loss": 0.0889, "step": 260600 }, { "epoch": 98.12, "learning_rate": 3.763643206624013e-07, "loss": 0.0907, "step": 260700 }, { "epoch": 98.16, "learning_rate": 3.688370342491532e-07, "loss": 0.0879, "step": 260800 }, { "epoch": 98.19, "learning_rate": 3.613097478359052e-07, "loss": 0.0877, "step": 260900 }, { "epoch": 98.23, "learning_rate": 3.537824614226572e-07, "loss": 0.0895, "step": 261000 }, { "epoch": 98.27, "learning_rate": 3.462551750094091e-07, "loss": 0.0903, "step": 261100 }, { "epoch": 98.31, "learning_rate": 3.387278885961611e-07, "loss": 0.0897, "step": 261200 }, { "epoch": 98.34, "learning_rate": 3.312006021829131e-07, "loss": 0.0886, "step": 261300 }, { "epoch": 98.38, "learning_rate": 3.2367331576966506e-07, "loss": 0.0882, "step": 261400 }, { "epoch": 98.42, "learning_rate": 3.1614602935641704e-07, "loss": 0.0894, "step": 261500 }, { "epoch": 98.46, "learning_rate": 3.08618742943169e-07, "loss": 0.0901, "step": 261600 }, { "epoch": 98.49, "learning_rate": 3.0109145652992096e-07, "loss": 0.0909, "step": 261700 }, { "epoch": 98.53, "learning_rate": 2.93564170116673e-07, "loss": 0.0897, "step": 261800 }, { "epoch": 98.57, "learning_rate": 2.860368837034249e-07, "loss": 0.0907, "step": 261900 }, { "epoch": 98.61, "learning_rate": 2.785095972901769e-07, "loss": 0.0885, "step": 262000 }, { "epoch": 98.65, "learning_rate": 2.709823108769289e-07, "loss": 0.0898, "step": 262100 }, { "epoch": 98.68, "learning_rate": 2.634550244636809e-07, "loss": 0.0897, "step": 262200 }, { "epoch": 98.72, "learning_rate": 2.559277380504328e-07, "loss": 0.0887, "step": 262300 }, { "epoch": 98.76, "learning_rate": 2.484004516371848e-07, "loss": 0.0888, "step": 262400 }, { "epoch": 98.8, "learning_rate": 2.408731652239368e-07, "loss": 0.0897, "step": 262500 }, { "epoch": 98.83, "learning_rate": 2.3334587881068876e-07, "loss": 0.0894, "step": 262600 }, { "epoch": 98.87, "learning_rate": 2.2581859239744072e-07, "loss": 0.0902, "step": 262700 }, { "epoch": 98.91, "learning_rate": 2.1829130598419273e-07, "loss": 0.0898, "step": 262800 }, { "epoch": 98.95, "learning_rate": 2.1076401957094469e-07, "loss": 0.0908, "step": 262900 }, { "epoch": 98.98, "learning_rate": 2.0323673315769667e-07, "loss": 0.0881, "step": 263000 }, { "epoch": 99.0, "eval_loss": 0.08917281776666641, "eval_runtime": 43.4811, "eval_samples_per_second": 172.489, "eval_steps_per_second": 10.786, "step": 263043 }, { "epoch": 99.02, "learning_rate": 1.9570944674444863e-07, "loss": 0.0909, "step": 263100 }, { "epoch": 99.06, "learning_rate": 1.8818216033120064e-07, "loss": 0.0888, "step": 263200 }, { "epoch": 99.1, "learning_rate": 1.806548739179526e-07, "loss": 0.0897, "step": 263300 }, { "epoch": 99.13, "learning_rate": 1.7312758750470455e-07, "loss": 0.0897, "step": 263400 }, { "epoch": 99.17, "learning_rate": 1.6560030109145656e-07, "loss": 0.0882, "step": 263500 }, { "epoch": 99.21, "learning_rate": 1.5807301467820852e-07, "loss": 0.0889, "step": 263600 }, { "epoch": 99.25, "learning_rate": 1.5054572826496048e-07, "loss": 0.0908, "step": 263700 }, { "epoch": 99.28, "learning_rate": 1.4301844185171246e-07, "loss": 0.088, "step": 263800 }, { "epoch": 99.32, "learning_rate": 1.3549115543846445e-07, "loss": 0.0876, "step": 263900 }, { "epoch": 99.36, "learning_rate": 1.279638690252164e-07, "loss": 0.088, "step": 264000 }, { "epoch": 99.4, "learning_rate": 1.204365826119684e-07, "loss": 0.0892, "step": 264100 }, { "epoch": 99.44, "learning_rate": 1.1290929619872036e-07, "loss": 0.0895, "step": 264200 }, { "epoch": 99.47, "learning_rate": 1.0538200978547234e-07, "loss": 0.0899, "step": 264300 }, { "epoch": 99.51, "learning_rate": 9.785472337222431e-08, "loss": 0.089, "step": 264400 }, { "epoch": 99.55, "learning_rate": 9.03274369589763e-08, "loss": 0.0895, "step": 264500 }, { "epoch": 99.59, "learning_rate": 8.280015054572828e-08, "loss": 0.091, "step": 264600 }, { "epoch": 99.62, "learning_rate": 7.527286413248024e-08, "loss": 0.0896, "step": 264700 }, { "epoch": 99.66, "learning_rate": 6.774557771923222e-08, "loss": 0.0894, "step": 264800 }, { "epoch": 99.7, "learning_rate": 6.02182913059842e-08, "loss": 0.0898, "step": 264900 }, { "epoch": 99.74, "learning_rate": 5.269100489273617e-08, "loss": 0.0882, "step": 265000 }, { "epoch": 99.77, "learning_rate": 4.516371847948815e-08, "loss": 0.0901, "step": 265100 }, { "epoch": 99.81, "learning_rate": 3.763643206624012e-08, "loss": 0.0885, "step": 265200 }, { "epoch": 99.85, "learning_rate": 3.01091456529921e-08, "loss": 0.0901, "step": 265300 }, { "epoch": 99.89, "learning_rate": 2.2581859239744074e-08, "loss": 0.0897, "step": 265400 }, { "epoch": 99.92, "learning_rate": 1.505457282649605e-08, "loss": 0.0902, "step": 265500 }, { "epoch": 99.96, "learning_rate": 7.527286413248024e-09, "loss": 0.0911, "step": 265600 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.09, "step": 265700 }, { "epoch": 100.0, "eval_loss": 0.08935302495956421, "eval_runtime": 44.5127, "eval_samples_per_second": 168.491, "eval_steps_per_second": 10.536, "step": 265700 }, { "epoch": 100.0, "step": 265700, "total_flos": 3.31604966375424e+20, "train_loss": 0.10943094944119408, "train_runtime": 65782.603, "train_samples_per_second": 64.607, "train_steps_per_second": 4.039 } ], "max_steps": 265700, "num_train_epochs": 100, "total_flos": 3.31604966375424e+20, "trial_name": null, "trial_params": null }