diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,16767 @@ +{ + "best_metric": 0.08917281776666641, + "best_model_checkpoint": "/content/drive/MyDrive/vit-cifar10/checkpoint-263043", + "epoch": 100.0, + "global_step": 265700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.04, + "learning_rate": 1.9992472713586754e-05, + "loss": 0.3607, + "step": 100 + }, + { + "epoch": 0.08, + "learning_rate": 1.9984945427173507e-05, + "loss": 0.3215, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 1.997741814076026e-05, + "loss": 0.3187, + "step": 300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9969890854347008e-05, + "loss": 0.3123, + "step": 400 + }, + { + "epoch": 0.19, + "learning_rate": 1.996236356793376e-05, + "loss": 0.3007, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 1.9954836281520513e-05, + "loss": 0.3073, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9947308995107266e-05, + "loss": 0.3031, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 1.993978170869402e-05, + "loss": 0.3008, + "step": 800 + }, + { + "epoch": 0.34, + "learning_rate": 1.993225442228077e-05, + "loss": 0.2996, + "step": 900 + }, + { + "epoch": 0.38, + "learning_rate": 1.992472713586752e-05, + "loss": 0.3026, + "step": 1000 + }, + { + "epoch": 0.41, + "learning_rate": 1.9917199849454273e-05, + "loss": 0.3003, + "step": 1100 + }, + { + "epoch": 0.45, + "learning_rate": 1.9909672563041025e-05, + "loss": 0.2994, + "step": 1200 + }, + { + "epoch": 0.49, + "learning_rate": 1.9902145276627778e-05, + "loss": 0.2974, + "step": 1300 + }, + { + "epoch": 0.53, + "learning_rate": 1.989461799021453e-05, + "loss": 0.2973, + "step": 1400 + }, + { + "epoch": 0.56, + "learning_rate": 1.9887090703801283e-05, + "loss": 0.2995, + "step": 1500 + }, + { + "epoch": 0.6, + "learning_rate": 1.987956341738803e-05, + "loss": 0.2945, + "step": 1600 + }, + { + "epoch": 0.64, + "learning_rate": 1.9872036130974784e-05, + "loss": 0.296, + "step": 1700 + }, + { + "epoch": 0.68, + "learning_rate": 1.9864508844561537e-05, + "loss": 0.2904, + "step": 1800 + }, + { + "epoch": 0.72, + "learning_rate": 1.985698155814829e-05, + "loss": 0.2931, + "step": 1900 + }, + { + "epoch": 0.75, + "learning_rate": 1.9849454271735042e-05, + "loss": 0.2952, + "step": 2000 + }, + { + "epoch": 0.79, + "learning_rate": 1.9841926985321794e-05, + "loss": 0.2935, + "step": 2100 + }, + { + "epoch": 0.83, + "learning_rate": 1.9834399698908543e-05, + "loss": 0.2912, + "step": 2200 + }, + { + "epoch": 0.87, + "learning_rate": 1.9826872412495296e-05, + "loss": 0.2951, + "step": 2300 + }, + { + "epoch": 0.9, + "learning_rate": 1.981934512608205e-05, + "loss": 0.2873, + "step": 2400 + }, + { + "epoch": 0.94, + "learning_rate": 1.98118178396688e-05, + "loss": 0.2906, + "step": 2500 + }, + { + "epoch": 0.98, + "learning_rate": 1.9804290553255553e-05, + "loss": 0.289, + "step": 2600 + }, + { + "epoch": 1.0, + "eval_loss": 0.2940625846385956, + "eval_runtime": 45.1121, + "eval_samples_per_second": 166.253, + "eval_steps_per_second": 10.396, + "step": 2657 + }, + { + "epoch": 1.02, + "learning_rate": 1.9796763266842306e-05, + "loss": 0.2831, + "step": 2700 + }, + { + "epoch": 1.05, + "learning_rate": 1.978923598042906e-05, + "loss": 0.2941, + "step": 2800 + }, + { + "epoch": 1.09, + "learning_rate": 1.9781708694015808e-05, + "loss": 0.2866, + "step": 2900 + }, + { + "epoch": 1.13, + "learning_rate": 1.977418140760256e-05, + "loss": 0.2811, + "step": 3000 + }, + { + "epoch": 1.17, + "learning_rate": 1.9766654121189313e-05, + "loss": 0.2912, + "step": 3100 + }, + { + "epoch": 1.2, + "learning_rate": 1.9759126834776065e-05, + "loss": 0.289, + "step": 3200 + }, + { + "epoch": 1.24, + "learning_rate": 1.9751599548362818e-05, + "loss": 0.2907, + "step": 3300 + }, + { + "epoch": 1.28, + "learning_rate": 1.974407226194957e-05, + "loss": 0.2913, + "step": 3400 + }, + { + "epoch": 1.32, + "learning_rate": 1.973654497553632e-05, + "loss": 0.2863, + "step": 3500 + }, + { + "epoch": 1.35, + "learning_rate": 1.9729017689123072e-05, + "loss": 0.2822, + "step": 3600 + }, + { + "epoch": 1.39, + "learning_rate": 1.9721490402709824e-05, + "loss": 0.2808, + "step": 3700 + }, + { + "epoch": 1.43, + "learning_rate": 1.9713963116296577e-05, + "loss": 0.2836, + "step": 3800 + }, + { + "epoch": 1.47, + "learning_rate": 1.970643582988333e-05, + "loss": 0.2796, + "step": 3900 + }, + { + "epoch": 1.51, + "learning_rate": 1.9698908543470082e-05, + "loss": 0.2831, + "step": 4000 + }, + { + "epoch": 1.54, + "learning_rate": 1.969138125705683e-05, + "loss": 0.2878, + "step": 4100 + }, + { + "epoch": 1.58, + "learning_rate": 1.9683853970643583e-05, + "loss": 0.2767, + "step": 4200 + }, + { + "epoch": 1.62, + "learning_rate": 1.9676326684230336e-05, + "loss": 0.2835, + "step": 4300 + }, + { + "epoch": 1.66, + "learning_rate": 1.966879939781709e-05, + "loss": 0.2823, + "step": 4400 + }, + { + "epoch": 1.69, + "learning_rate": 1.966127211140384e-05, + "loss": 0.284, + "step": 4500 + }, + { + "epoch": 1.73, + "learning_rate": 1.9653744824990594e-05, + "loss": 0.2813, + "step": 4600 + }, + { + "epoch": 1.77, + "learning_rate": 1.9646217538577343e-05, + "loss": 0.2806, + "step": 4700 + }, + { + "epoch": 1.81, + "learning_rate": 1.9638690252164095e-05, + "loss": 0.2846, + "step": 4800 + }, + { + "epoch": 1.84, + "learning_rate": 1.9631162965750848e-05, + "loss": 0.2834, + "step": 4900 + }, + { + "epoch": 1.88, + "learning_rate": 1.96236356793376e-05, + "loss": 0.2821, + "step": 5000 + }, + { + "epoch": 1.92, + "learning_rate": 1.9616108392924353e-05, + "loss": 0.2837, + "step": 5100 + }, + { + "epoch": 1.96, + "learning_rate": 1.9608581106511105e-05, + "loss": 0.2782, + "step": 5200 + }, + { + "epoch": 1.99, + "learning_rate": 1.9601053820097858e-05, + "loss": 0.2858, + "step": 5300 + }, + { + "epoch": 2.0, + "eval_loss": 0.28088775277137756, + "eval_runtime": 45.112, + "eval_samples_per_second": 166.253, + "eval_steps_per_second": 10.396, + "step": 5314 + }, + { + "epoch": 2.03, + "learning_rate": 1.9593526533684607e-05, + "loss": 0.2826, + "step": 5400 + }, + { + "epoch": 2.07, + "learning_rate": 1.9585999247271363e-05, + "loss": 0.2858, + "step": 5500 + }, + { + "epoch": 2.11, + "learning_rate": 1.9578471960858112e-05, + "loss": 0.2782, + "step": 5600 + }, + { + "epoch": 2.15, + "learning_rate": 1.9570944674444864e-05, + "loss": 0.2779, + "step": 5700 + }, + { + "epoch": 2.18, + "learning_rate": 1.9563417388031617e-05, + "loss": 0.2792, + "step": 5800 + }, + { + "epoch": 2.22, + "learning_rate": 1.955589010161837e-05, + "loss": 0.2837, + "step": 5900 + }, + { + "epoch": 2.26, + "learning_rate": 1.954836281520512e-05, + "loss": 0.2792, + "step": 6000 + }, + { + "epoch": 2.3, + "learning_rate": 1.954083552879187e-05, + "loss": 0.2825, + "step": 6100 + }, + { + "epoch": 2.33, + "learning_rate": 1.9533308242378624e-05, + "loss": 0.2762, + "step": 6200 + }, + { + "epoch": 2.37, + "learning_rate": 1.9525780955965376e-05, + "loss": 0.283, + "step": 6300 + }, + { + "epoch": 2.41, + "learning_rate": 1.951825366955213e-05, + "loss": 0.2824, + "step": 6400 + }, + { + "epoch": 2.45, + "learning_rate": 1.951072638313888e-05, + "loss": 0.2787, + "step": 6500 + }, + { + "epoch": 2.48, + "learning_rate": 1.950319909672563e-05, + "loss": 0.2769, + "step": 6600 + }, + { + "epoch": 2.52, + "learning_rate": 1.9495671810312383e-05, + "loss": 0.2791, + "step": 6700 + }, + { + "epoch": 2.56, + "learning_rate": 1.9488144523899135e-05, + "loss": 0.2754, + "step": 6800 + }, + { + "epoch": 2.6, + "learning_rate": 1.9480617237485888e-05, + "loss": 0.2721, + "step": 6900 + }, + { + "epoch": 2.63, + "learning_rate": 1.947308995107264e-05, + "loss": 0.2796, + "step": 7000 + }, + { + "epoch": 2.67, + "learning_rate": 1.9465562664659393e-05, + "loss": 0.2771, + "step": 7100 + }, + { + "epoch": 2.71, + "learning_rate": 1.9458035378246142e-05, + "loss": 0.2778, + "step": 7200 + }, + { + "epoch": 2.75, + "learning_rate": 1.9450508091832894e-05, + "loss": 0.2773, + "step": 7300 + }, + { + "epoch": 2.79, + "learning_rate": 1.9442980805419647e-05, + "loss": 0.2771, + "step": 7400 + }, + { + "epoch": 2.82, + "learning_rate": 1.94354535190064e-05, + "loss": 0.2764, + "step": 7500 + }, + { + "epoch": 2.86, + "learning_rate": 1.9427926232593152e-05, + "loss": 0.2789, + "step": 7600 + }, + { + "epoch": 2.9, + "learning_rate": 1.9420398946179905e-05, + "loss": 0.2768, + "step": 7700 + }, + { + "epoch": 2.94, + "learning_rate": 1.9412871659766657e-05, + "loss": 0.2765, + "step": 7800 + }, + { + "epoch": 2.97, + "learning_rate": 1.9405344373353406e-05, + "loss": 0.2693, + "step": 7900 + }, + { + "epoch": 3.0, + "eval_loss": 0.2738477289676666, + "eval_runtime": 44.824, + "eval_samples_per_second": 167.321, + "eval_steps_per_second": 10.463, + "step": 7971 + }, + { + "epoch": 3.01, + "learning_rate": 1.9397817086940162e-05, + "loss": 0.277, + "step": 8000 + }, + { + "epoch": 3.05, + "learning_rate": 1.939028980052691e-05, + "loss": 0.2734, + "step": 8100 + }, + { + "epoch": 3.09, + "learning_rate": 1.9382762514113664e-05, + "loss": 0.2767, + "step": 8200 + }, + { + "epoch": 3.12, + "learning_rate": 1.9375235227700416e-05, + "loss": 0.2767, + "step": 8300 + }, + { + "epoch": 3.16, + "learning_rate": 1.936770794128717e-05, + "loss": 0.271, + "step": 8400 + }, + { + "epoch": 3.2, + "learning_rate": 1.9360180654873918e-05, + "loss": 0.2747, + "step": 8500 + }, + { + "epoch": 3.24, + "learning_rate": 1.9352653368460674e-05, + "loss": 0.2758, + "step": 8600 + }, + { + "epoch": 3.27, + "learning_rate": 1.9345126082047423e-05, + "loss": 0.2706, + "step": 8700 + }, + { + "epoch": 3.31, + "learning_rate": 1.9337598795634175e-05, + "loss": 0.2734, + "step": 8800 + }, + { + "epoch": 3.35, + "learning_rate": 1.9330071509220928e-05, + "loss": 0.2732, + "step": 8900 + }, + { + "epoch": 3.39, + "learning_rate": 1.932254422280768e-05, + "loss": 0.2724, + "step": 9000 + }, + { + "epoch": 3.42, + "learning_rate": 1.931501693639443e-05, + "loss": 0.2678, + "step": 9100 + }, + { + "epoch": 3.46, + "learning_rate": 1.9307489649981182e-05, + "loss": 0.2755, + "step": 9200 + }, + { + "epoch": 3.5, + "learning_rate": 1.9299962363567935e-05, + "loss": 0.264, + "step": 9300 + }, + { + "epoch": 3.54, + "learning_rate": 1.9292435077154687e-05, + "loss": 0.2706, + "step": 9400 + }, + { + "epoch": 3.58, + "learning_rate": 1.928490779074144e-05, + "loss": 0.2686, + "step": 9500 + }, + { + "epoch": 3.61, + "learning_rate": 1.9277380504328192e-05, + "loss": 0.2681, + "step": 9600 + }, + { + "epoch": 3.65, + "learning_rate": 1.926985321791494e-05, + "loss": 0.2671, + "step": 9700 + }, + { + "epoch": 3.69, + "learning_rate": 1.9262325931501694e-05, + "loss": 0.2668, + "step": 9800 + }, + { + "epoch": 3.73, + "learning_rate": 1.9254798645088446e-05, + "loss": 0.2621, + "step": 9900 + }, + { + "epoch": 3.76, + "learning_rate": 1.92472713586752e-05, + "loss": 0.2641, + "step": 10000 + }, + { + "epoch": 3.8, + "learning_rate": 1.923974407226195e-05, + "loss": 0.2645, + "step": 10100 + }, + { + "epoch": 3.84, + "learning_rate": 1.9232216785848704e-05, + "loss": 0.2593, + "step": 10200 + }, + { + "epoch": 3.88, + "learning_rate": 1.9224689499435456e-05, + "loss": 0.2604, + "step": 10300 + }, + { + "epoch": 3.91, + "learning_rate": 1.9217162213022205e-05, + "loss": 0.2614, + "step": 10400 + }, + { + "epoch": 3.95, + "learning_rate": 1.920963492660896e-05, + "loss": 0.2627, + "step": 10500 + }, + { + "epoch": 3.99, + "learning_rate": 1.920210764019571e-05, + "loss": 0.2578, + "step": 10600 + }, + { + "epoch": 4.0, + "eval_loss": 0.2545997202396393, + "eval_runtime": 44.2178, + "eval_samples_per_second": 169.615, + "eval_steps_per_second": 10.607, + "step": 10628 + }, + { + "epoch": 4.03, + "learning_rate": 1.9194580353782463e-05, + "loss": 0.2567, + "step": 10700 + }, + { + "epoch": 4.06, + "learning_rate": 1.9187053067369215e-05, + "loss": 0.2547, + "step": 10800 + }, + { + "epoch": 4.1, + "learning_rate": 1.9179525780955968e-05, + "loss": 0.2564, + "step": 10900 + }, + { + "epoch": 4.14, + "learning_rate": 1.9171998494542717e-05, + "loss": 0.2603, + "step": 11000 + }, + { + "epoch": 4.18, + "learning_rate": 1.9164471208129473e-05, + "loss": 0.2514, + "step": 11100 + }, + { + "epoch": 4.22, + "learning_rate": 1.9156943921716222e-05, + "loss": 0.2571, + "step": 11200 + }, + { + "epoch": 4.25, + "learning_rate": 1.9149416635302975e-05, + "loss": 0.2513, + "step": 11300 + }, + { + "epoch": 4.29, + "learning_rate": 1.9141889348889727e-05, + "loss": 0.2531, + "step": 11400 + }, + { + "epoch": 4.33, + "learning_rate": 1.913436206247648e-05, + "loss": 0.2485, + "step": 11500 + }, + { + "epoch": 4.37, + "learning_rate": 1.912683477606323e-05, + "loss": 0.2519, + "step": 11600 + }, + { + "epoch": 4.4, + "learning_rate": 1.9119307489649985e-05, + "loss": 0.2506, + "step": 11700 + }, + { + "epoch": 4.44, + "learning_rate": 1.9111780203236734e-05, + "loss": 0.2487, + "step": 11800 + }, + { + "epoch": 4.48, + "learning_rate": 1.9104252916823486e-05, + "loss": 0.2456, + "step": 11900 + }, + { + "epoch": 4.52, + "learning_rate": 1.909672563041024e-05, + "loss": 0.2493, + "step": 12000 + }, + { + "epoch": 4.55, + "learning_rate": 1.908919834399699e-05, + "loss": 0.2439, + "step": 12100 + }, + { + "epoch": 4.59, + "learning_rate": 1.908167105758374e-05, + "loss": 0.2434, + "step": 12200 + }, + { + "epoch": 4.63, + "learning_rate": 1.9074143771170493e-05, + "loss": 0.2454, + "step": 12300 + }, + { + "epoch": 4.67, + "learning_rate": 1.9066616484757246e-05, + "loss": 0.2406, + "step": 12400 + }, + { + "epoch": 4.7, + "learning_rate": 1.9059089198343998e-05, + "loss": 0.2392, + "step": 12500 + }, + { + "epoch": 4.74, + "learning_rate": 1.905156191193075e-05, + "loss": 0.2394, + "step": 12600 + }, + { + "epoch": 4.78, + "learning_rate": 1.9044034625517503e-05, + "loss": 0.2351, + "step": 12700 + }, + { + "epoch": 4.82, + "learning_rate": 1.9036507339104256e-05, + "loss": 0.2367, + "step": 12800 + }, + { + "epoch": 4.86, + "learning_rate": 1.9028980052691005e-05, + "loss": 0.233, + "step": 12900 + }, + { + "epoch": 4.89, + "learning_rate": 1.902145276627776e-05, + "loss": 0.2276, + "step": 13000 + }, + { + "epoch": 4.93, + "learning_rate": 1.901392547986451e-05, + "loss": 0.2323, + "step": 13100 + }, + { + "epoch": 4.97, + "learning_rate": 1.9006398193451262e-05, + "loss": 0.2211, + "step": 13200 + }, + { + "epoch": 5.0, + "eval_loss": 0.21532748639583588, + "eval_runtime": 44.0312, + "eval_samples_per_second": 170.334, + "eval_steps_per_second": 10.652, + "step": 13285 + }, + { + "epoch": 5.01, + "learning_rate": 1.8998870907038015e-05, + "loss": 0.2225, + "step": 13300 + }, + { + "epoch": 5.04, + "learning_rate": 1.8991343620624767e-05, + "loss": 0.2211, + "step": 13400 + }, + { + "epoch": 5.08, + "learning_rate": 1.8983816334211516e-05, + "loss": 0.2178, + "step": 13500 + }, + { + "epoch": 5.12, + "learning_rate": 1.8976289047798272e-05, + "loss": 0.2164, + "step": 13600 + }, + { + "epoch": 5.16, + "learning_rate": 1.896876176138502e-05, + "loss": 0.2144, + "step": 13700 + }, + { + "epoch": 5.19, + "learning_rate": 1.8961234474971774e-05, + "loss": 0.2093, + "step": 13800 + }, + { + "epoch": 5.23, + "learning_rate": 1.8953707188558526e-05, + "loss": 0.2087, + "step": 13900 + }, + { + "epoch": 5.27, + "learning_rate": 1.894617990214528e-05, + "loss": 0.2111, + "step": 14000 + }, + { + "epoch": 5.31, + "learning_rate": 1.8938652615732028e-05, + "loss": 0.2041, + "step": 14100 + }, + { + "epoch": 5.34, + "learning_rate": 1.8931125329318784e-05, + "loss": 0.2023, + "step": 14200 + }, + { + "epoch": 5.38, + "learning_rate": 1.8923598042905533e-05, + "loss": 0.2011, + "step": 14300 + }, + { + "epoch": 5.42, + "learning_rate": 1.8916070756492286e-05, + "loss": 0.2028, + "step": 14400 + }, + { + "epoch": 5.46, + "learning_rate": 1.8908543470079038e-05, + "loss": 0.198, + "step": 14500 + }, + { + "epoch": 5.49, + "learning_rate": 1.890101618366579e-05, + "loss": 0.1986, + "step": 14600 + }, + { + "epoch": 5.53, + "learning_rate": 1.889348889725254e-05, + "loss": 0.1959, + "step": 14700 + }, + { + "epoch": 5.57, + "learning_rate": 1.8885961610839296e-05, + "loss": 0.1953, + "step": 14800 + }, + { + "epoch": 5.61, + "learning_rate": 1.8878434324426048e-05, + "loss": 0.193, + "step": 14900 + }, + { + "epoch": 5.65, + "learning_rate": 1.8870907038012797e-05, + "loss": 0.1918, + "step": 15000 + }, + { + "epoch": 5.68, + "learning_rate": 1.886337975159955e-05, + "loss": 0.1911, + "step": 15100 + }, + { + "epoch": 5.72, + "learning_rate": 1.8855852465186302e-05, + "loss": 0.1889, + "step": 15200 + }, + { + "epoch": 5.76, + "learning_rate": 1.8848325178773055e-05, + "loss": 0.1879, + "step": 15300 + }, + { + "epoch": 5.8, + "learning_rate": 1.8840797892359804e-05, + "loss": 0.1844, + "step": 15400 + }, + { + "epoch": 5.83, + "learning_rate": 1.883327060594656e-05, + "loss": 0.1821, + "step": 15500 + }, + { + "epoch": 5.87, + "learning_rate": 1.882574331953331e-05, + "loss": 0.1808, + "step": 15600 + }, + { + "epoch": 5.91, + "learning_rate": 1.881821603312006e-05, + "loss": 0.1863, + "step": 15700 + }, + { + "epoch": 5.95, + "learning_rate": 1.8810688746706814e-05, + "loss": 0.1806, + "step": 15800 + }, + { + "epoch": 5.98, + "learning_rate": 1.8803161460293567e-05, + "loss": 0.1799, + "step": 15900 + }, + { + "epoch": 6.0, + "eval_loss": 0.1794862300157547, + "eval_runtime": 44.3468, + "eval_samples_per_second": 169.121, + "eval_steps_per_second": 10.576, + "step": 15942 + }, + { + "epoch": 6.02, + "learning_rate": 1.8795634173880316e-05, + "loss": 0.1773, + "step": 16000 + }, + { + "epoch": 6.06, + "learning_rate": 1.878810688746707e-05, + "loss": 0.1746, + "step": 16100 + }, + { + "epoch": 6.1, + "learning_rate": 1.878057960105382e-05, + "loss": 0.1772, + "step": 16200 + }, + { + "epoch": 6.13, + "learning_rate": 1.8773052314640573e-05, + "loss": 0.1724, + "step": 16300 + }, + { + "epoch": 6.17, + "learning_rate": 1.8765525028227326e-05, + "loss": 0.17, + "step": 16400 + }, + { + "epoch": 6.21, + "learning_rate": 1.8757997741814078e-05, + "loss": 0.1737, + "step": 16500 + }, + { + "epoch": 6.25, + "learning_rate": 1.8750470455400827e-05, + "loss": 0.1732, + "step": 16600 + }, + { + "epoch": 6.29, + "learning_rate": 1.8742943168987583e-05, + "loss": 0.1689, + "step": 16700 + }, + { + "epoch": 6.32, + "learning_rate": 1.8735415882574332e-05, + "loss": 0.1704, + "step": 16800 + }, + { + "epoch": 6.36, + "learning_rate": 1.8727888596161085e-05, + "loss": 0.1664, + "step": 16900 + }, + { + "epoch": 6.4, + "learning_rate": 1.8720361309747837e-05, + "loss": 0.1651, + "step": 17000 + }, + { + "epoch": 6.44, + "learning_rate": 1.871283402333459e-05, + "loss": 0.1678, + "step": 17100 + }, + { + "epoch": 6.47, + "learning_rate": 1.870530673692134e-05, + "loss": 0.1674, + "step": 17200 + }, + { + "epoch": 6.51, + "learning_rate": 1.8697779450508095e-05, + "loss": 0.1674, + "step": 17300 + }, + { + "epoch": 6.55, + "learning_rate": 1.8690252164094847e-05, + "loss": 0.1643, + "step": 17400 + }, + { + "epoch": 6.59, + "learning_rate": 1.8682724877681597e-05, + "loss": 0.1667, + "step": 17500 + }, + { + "epoch": 6.62, + "learning_rate": 1.867519759126835e-05, + "loss": 0.1601, + "step": 17600 + }, + { + "epoch": 6.66, + "learning_rate": 1.86676703048551e-05, + "loss": 0.1645, + "step": 17700 + }, + { + "epoch": 6.7, + "learning_rate": 1.8660143018441854e-05, + "loss": 0.1637, + "step": 17800 + }, + { + "epoch": 6.74, + "learning_rate": 1.8652615732028607e-05, + "loss": 0.1633, + "step": 17900 + }, + { + "epoch": 6.77, + "learning_rate": 1.864508844561536e-05, + "loss": 0.1651, + "step": 18000 + }, + { + "epoch": 6.81, + "learning_rate": 1.863756115920211e-05, + "loss": 0.1589, + "step": 18100 + }, + { + "epoch": 6.85, + "learning_rate": 1.863003387278886e-05, + "loss": 0.1612, + "step": 18200 + }, + { + "epoch": 6.89, + "learning_rate": 1.8622506586375613e-05, + "loss": 0.1629, + "step": 18300 + }, + { + "epoch": 6.93, + "learning_rate": 1.8614979299962366e-05, + "loss": 0.1571, + "step": 18400 + }, + { + "epoch": 6.96, + "learning_rate": 1.8607452013549115e-05, + "loss": 0.158, + "step": 18500 + }, + { + "epoch": 7.0, + "eval_loss": 0.162311390042305, + "eval_runtime": 44.6199, + "eval_samples_per_second": 168.087, + "eval_steps_per_second": 10.511, + "step": 18599 + }, + { + "epoch": 7.0, + "learning_rate": 1.859992472713587e-05, + "loss": 0.158, + "step": 18600 + }, + { + "epoch": 7.04, + "learning_rate": 1.859239744072262e-05, + "loss": 0.1588, + "step": 18700 + }, + { + "epoch": 7.08, + "learning_rate": 1.8584870154309373e-05, + "loss": 0.1556, + "step": 18800 + }, + { + "epoch": 7.11, + "learning_rate": 1.8577342867896125e-05, + "loss": 0.1552, + "step": 18900 + }, + { + "epoch": 7.15, + "learning_rate": 1.8569815581482878e-05, + "loss": 0.1567, + "step": 19000 + }, + { + "epoch": 7.19, + "learning_rate": 1.8562288295069627e-05, + "loss": 0.1533, + "step": 19100 + }, + { + "epoch": 7.23, + "learning_rate": 1.8554761008656383e-05, + "loss": 0.1543, + "step": 19200 + }, + { + "epoch": 7.26, + "learning_rate": 1.854723372224313e-05, + "loss": 0.1545, + "step": 19300 + }, + { + "epoch": 7.3, + "learning_rate": 1.8539706435829884e-05, + "loss": 0.1557, + "step": 19400 + }, + { + "epoch": 7.34, + "learning_rate": 1.8532179149416637e-05, + "loss": 0.1524, + "step": 19500 + }, + { + "epoch": 7.38, + "learning_rate": 1.852465186300339e-05, + "loss": 0.1538, + "step": 19600 + }, + { + "epoch": 7.41, + "learning_rate": 1.851712457659014e-05, + "loss": 0.1533, + "step": 19700 + }, + { + "epoch": 7.45, + "learning_rate": 1.8509597290176894e-05, + "loss": 0.1506, + "step": 19800 + }, + { + "epoch": 7.49, + "learning_rate": 1.8502070003763647e-05, + "loss": 0.1499, + "step": 19900 + }, + { + "epoch": 7.53, + "learning_rate": 1.8494542717350396e-05, + "loss": 0.1514, + "step": 20000 + }, + { + "epoch": 7.56, + "learning_rate": 1.848701543093715e-05, + "loss": 0.1497, + "step": 20100 + }, + { + "epoch": 7.6, + "learning_rate": 1.84794881445239e-05, + "loss": 0.151, + "step": 20200 + }, + { + "epoch": 7.64, + "learning_rate": 1.8471960858110653e-05, + "loss": 0.1523, + "step": 20300 + }, + { + "epoch": 7.68, + "learning_rate": 1.8464433571697406e-05, + "loss": 0.1484, + "step": 20400 + }, + { + "epoch": 7.72, + "learning_rate": 1.845690628528416e-05, + "loss": 0.15, + "step": 20500 + }, + { + "epoch": 7.75, + "learning_rate": 1.8449378998870908e-05, + "loss": 0.1467, + "step": 20600 + }, + { + "epoch": 7.79, + "learning_rate": 1.844185171245766e-05, + "loss": 0.1467, + "step": 20700 + }, + { + "epoch": 7.83, + "learning_rate": 1.8434324426044413e-05, + "loss": 0.1469, + "step": 20800 + }, + { + "epoch": 7.87, + "learning_rate": 1.8426797139631165e-05, + "loss": 0.148, + "step": 20900 + }, + { + "epoch": 7.9, + "learning_rate": 1.8419269853217914e-05, + "loss": 0.1484, + "step": 21000 + }, + { + "epoch": 7.94, + "learning_rate": 1.841174256680467e-05, + "loss": 0.1459, + "step": 21100 + }, + { + "epoch": 7.98, + "learning_rate": 1.840421528039142e-05, + "loss": 0.1481, + "step": 21200 + }, + { + "epoch": 8.0, + "eval_loss": 0.14529532194137573, + "eval_runtime": 44.2737, + "eval_samples_per_second": 169.401, + "eval_steps_per_second": 10.593, + "step": 21256 + }, + { + "epoch": 8.02, + "learning_rate": 1.8396687993978172e-05, + "loss": 0.1446, + "step": 21300 + }, + { + "epoch": 8.05, + "learning_rate": 1.8389160707564924e-05, + "loss": 0.1465, + "step": 21400 + }, + { + "epoch": 8.09, + "learning_rate": 1.8381633421151677e-05, + "loss": 0.1458, + "step": 21500 + }, + { + "epoch": 8.13, + "learning_rate": 1.8374106134738426e-05, + "loss": 0.145, + "step": 21600 + }, + { + "epoch": 8.17, + "learning_rate": 1.8366578848325182e-05, + "loss": 0.1457, + "step": 21700 + }, + { + "epoch": 8.2, + "learning_rate": 1.835905156191193e-05, + "loss": 0.1422, + "step": 21800 + }, + { + "epoch": 8.24, + "learning_rate": 1.8351524275498683e-05, + "loss": 0.1435, + "step": 21900 + }, + { + "epoch": 8.28, + "learning_rate": 1.8343996989085436e-05, + "loss": 0.1456, + "step": 22000 + }, + { + "epoch": 8.32, + "learning_rate": 1.833646970267219e-05, + "loss": 0.1472, + "step": 22100 + }, + { + "epoch": 8.36, + "learning_rate": 1.8328942416258938e-05, + "loss": 0.1434, + "step": 22200 + }, + { + "epoch": 8.39, + "learning_rate": 1.8321415129845694e-05, + "loss": 0.1446, + "step": 22300 + }, + { + "epoch": 8.43, + "learning_rate": 1.8313887843432446e-05, + "loss": 0.1464, + "step": 22400 + }, + { + "epoch": 8.47, + "learning_rate": 1.8306360557019195e-05, + "loss": 0.1446, + "step": 22500 + }, + { + "epoch": 8.51, + "learning_rate": 1.8298833270605948e-05, + "loss": 0.1428, + "step": 22600 + }, + { + "epoch": 8.54, + "learning_rate": 1.82913059841927e-05, + "loss": 0.1437, + "step": 22700 + }, + { + "epoch": 8.58, + "learning_rate": 1.8283778697779453e-05, + "loss": 0.1412, + "step": 22800 + }, + { + "epoch": 8.62, + "learning_rate": 1.8276251411366205e-05, + "loss": 0.1396, + "step": 22900 + }, + { + "epoch": 8.66, + "learning_rate": 1.8268724124952958e-05, + "loss": 0.1426, + "step": 23000 + }, + { + "epoch": 8.69, + "learning_rate": 1.8261196838539707e-05, + "loss": 0.1421, + "step": 23100 + }, + { + "epoch": 8.73, + "learning_rate": 1.825366955212646e-05, + "loss": 0.1418, + "step": 23200 + }, + { + "epoch": 8.77, + "learning_rate": 1.8246142265713212e-05, + "loss": 0.1403, + "step": 23300 + }, + { + "epoch": 8.81, + "learning_rate": 1.8238614979299964e-05, + "loss": 0.1416, + "step": 23400 + }, + { + "epoch": 8.84, + "learning_rate": 1.8231087692886717e-05, + "loss": 0.1403, + "step": 23500 + }, + { + "epoch": 8.88, + "learning_rate": 1.822356040647347e-05, + "loss": 0.1396, + "step": 23600 + }, + { + "epoch": 8.92, + "learning_rate": 1.821603312006022e-05, + "loss": 0.1378, + "step": 23700 + }, + { + "epoch": 8.96, + "learning_rate": 1.820850583364697e-05, + "loss": 0.1392, + "step": 23800 + }, + { + "epoch": 9.0, + "learning_rate": 1.8200978547233724e-05, + "loss": 0.1391, + "step": 23900 + }, + { + "epoch": 9.0, + "eval_loss": 0.13683784008026123, + "eval_runtime": 44.0792, + "eval_samples_per_second": 170.148, + "eval_steps_per_second": 10.64, + "step": 23913 + }, + { + "epoch": 9.03, + "learning_rate": 1.8193451260820476e-05, + "loss": 0.1407, + "step": 24000 + }, + { + "epoch": 9.07, + "learning_rate": 1.8185923974407225e-05, + "loss": 0.1386, + "step": 24100 + }, + { + "epoch": 9.11, + "learning_rate": 1.817839668799398e-05, + "loss": 0.1385, + "step": 24200 + }, + { + "epoch": 9.15, + "learning_rate": 1.817086940158073e-05, + "loss": 0.1403, + "step": 24300 + }, + { + "epoch": 9.18, + "learning_rate": 1.8163342115167483e-05, + "loss": 0.1395, + "step": 24400 + }, + { + "epoch": 9.22, + "learning_rate": 1.8155814828754235e-05, + "loss": 0.1374, + "step": 24500 + }, + { + "epoch": 9.26, + "learning_rate": 1.8148287542340988e-05, + "loss": 0.1354, + "step": 24600 + }, + { + "epoch": 9.3, + "learning_rate": 1.8140760255927737e-05, + "loss": 0.1367, + "step": 24700 + }, + { + "epoch": 9.33, + "learning_rate": 1.8133232969514493e-05, + "loss": 0.1389, + "step": 24800 + }, + { + "epoch": 9.37, + "learning_rate": 1.8125705683101245e-05, + "loss": 0.1355, + "step": 24900 + }, + { + "epoch": 9.41, + "learning_rate": 1.8118178396687994e-05, + "loss": 0.1359, + "step": 25000 + }, + { + "epoch": 9.45, + "learning_rate": 1.8110651110274747e-05, + "loss": 0.1351, + "step": 25100 + }, + { + "epoch": 9.48, + "learning_rate": 1.81031238238615e-05, + "loss": 0.1381, + "step": 25200 + }, + { + "epoch": 9.52, + "learning_rate": 1.8095596537448252e-05, + "loss": 0.1364, + "step": 25300 + }, + { + "epoch": 9.56, + "learning_rate": 1.8088069251035005e-05, + "loss": 0.1339, + "step": 25400 + }, + { + "epoch": 9.6, + "learning_rate": 1.8080541964621757e-05, + "loss": 0.1346, + "step": 25500 + }, + { + "epoch": 9.63, + "learning_rate": 1.8073014678208506e-05, + "loss": 0.1338, + "step": 25600 + }, + { + "epoch": 9.67, + "learning_rate": 1.806548739179526e-05, + "loss": 0.1332, + "step": 25700 + }, + { + "epoch": 9.71, + "learning_rate": 1.805796010538201e-05, + "loss": 0.1344, + "step": 25800 + }, + { + "epoch": 9.75, + "learning_rate": 1.8050432818968764e-05, + "loss": 0.134, + "step": 25900 + }, + { + "epoch": 9.79, + "learning_rate": 1.8042905532555516e-05, + "loss": 0.1365, + "step": 26000 + }, + { + "epoch": 9.82, + "learning_rate": 1.803537824614227e-05, + "loss": 0.1354, + "step": 26100 + }, + { + "epoch": 9.86, + "learning_rate": 1.8027850959729018e-05, + "loss": 0.1327, + "step": 26200 + }, + { + "epoch": 9.9, + "learning_rate": 1.802032367331577e-05, + "loss": 0.1366, + "step": 26300 + }, + { + "epoch": 9.94, + "learning_rate": 1.8012796386902523e-05, + "loss": 0.1343, + "step": 26400 + }, + { + "epoch": 9.97, + "learning_rate": 1.8005269100489275e-05, + "loss": 0.1348, + "step": 26500 + }, + { + "epoch": 10.0, + "eval_loss": 0.13540224730968475, + "eval_runtime": 43.8422, + "eval_samples_per_second": 171.068, + "eval_steps_per_second": 10.697, + "step": 26570 + }, + { + "epoch": 10.01, + "learning_rate": 1.7997741814076028e-05, + "loss": 0.134, + "step": 26600 + }, + { + "epoch": 10.05, + "learning_rate": 1.799021452766278e-05, + "loss": 0.1327, + "step": 26700 + }, + { + "epoch": 10.09, + "learning_rate": 1.798268724124953e-05, + "loss": 0.1303, + "step": 26800 + }, + { + "epoch": 10.12, + "learning_rate": 1.7975159954836282e-05, + "loss": 0.1343, + "step": 26900 + }, + { + "epoch": 10.16, + "learning_rate": 1.7967632668423035e-05, + "loss": 0.1323, + "step": 27000 + }, + { + "epoch": 10.2, + "learning_rate": 1.7960105382009787e-05, + "loss": 0.1318, + "step": 27100 + }, + { + "epoch": 10.24, + "learning_rate": 1.7952578095596536e-05, + "loss": 0.1322, + "step": 27200 + }, + { + "epoch": 10.27, + "learning_rate": 1.7945050809183292e-05, + "loss": 0.1318, + "step": 27300 + }, + { + "epoch": 10.31, + "learning_rate": 1.7937523522770045e-05, + "loss": 0.1324, + "step": 27400 + }, + { + "epoch": 10.35, + "learning_rate": 1.7929996236356794e-05, + "loss": 0.1306, + "step": 27500 + }, + { + "epoch": 10.39, + "learning_rate": 1.7922468949943546e-05, + "loss": 0.1283, + "step": 27600 + }, + { + "epoch": 10.43, + "learning_rate": 1.79149416635303e-05, + "loss": 0.1311, + "step": 27700 + }, + { + "epoch": 10.46, + "learning_rate": 1.790741437711705e-05, + "loss": 0.1313, + "step": 27800 + }, + { + "epoch": 10.5, + "learning_rate": 1.7899887090703804e-05, + "loss": 0.1291, + "step": 27900 + }, + { + "epoch": 10.54, + "learning_rate": 1.7892359804290556e-05, + "loss": 0.1291, + "step": 28000 + }, + { + "epoch": 10.58, + "learning_rate": 1.7884832517877305e-05, + "loss": 0.1314, + "step": 28100 + }, + { + "epoch": 10.61, + "learning_rate": 1.7877305231464058e-05, + "loss": 0.1294, + "step": 28200 + }, + { + "epoch": 10.65, + "learning_rate": 1.786977794505081e-05, + "loss": 0.1308, + "step": 28300 + }, + { + "epoch": 10.69, + "learning_rate": 1.7862250658637563e-05, + "loss": 0.1317, + "step": 28400 + }, + { + "epoch": 10.73, + "learning_rate": 1.7854723372224315e-05, + "loss": 0.129, + "step": 28500 + }, + { + "epoch": 10.76, + "learning_rate": 1.7847196085811068e-05, + "loss": 0.1307, + "step": 28600 + }, + { + "epoch": 10.8, + "learning_rate": 1.7839668799397817e-05, + "loss": 0.1302, + "step": 28700 + }, + { + "epoch": 10.84, + "learning_rate": 1.783214151298457e-05, + "loss": 0.1283, + "step": 28800 + }, + { + "epoch": 10.88, + "learning_rate": 1.7824614226571322e-05, + "loss": 0.1277, + "step": 28900 + }, + { + "epoch": 10.91, + "learning_rate": 1.7817086940158075e-05, + "loss": 0.1287, + "step": 29000 + }, + { + "epoch": 10.95, + "learning_rate": 1.7809559653744827e-05, + "loss": 0.13, + "step": 29100 + }, + { + "epoch": 10.99, + "learning_rate": 1.780203236733158e-05, + "loss": 0.129, + "step": 29200 + }, + { + "epoch": 11.0, + "eval_loss": 0.12486864626407623, + "eval_runtime": 44.5752, + "eval_samples_per_second": 168.255, + "eval_steps_per_second": 10.522, + "step": 29227 + }, + { + "epoch": 11.03, + "learning_rate": 1.779450508091833e-05, + "loss": 0.1259, + "step": 29300 + }, + { + "epoch": 11.07, + "learning_rate": 1.778697779450508e-05, + "loss": 0.128, + "step": 29400 + }, + { + "epoch": 11.1, + "learning_rate": 1.7779450508091834e-05, + "loss": 0.127, + "step": 29500 + }, + { + "epoch": 11.14, + "learning_rate": 1.7771923221678586e-05, + "loss": 0.1277, + "step": 29600 + }, + { + "epoch": 11.18, + "learning_rate": 1.776439593526534e-05, + "loss": 0.1271, + "step": 29700 + }, + { + "epoch": 11.22, + "learning_rate": 1.775686864885209e-05, + "loss": 0.1263, + "step": 29800 + }, + { + "epoch": 11.25, + "learning_rate": 1.7749341362438844e-05, + "loss": 0.1259, + "step": 29900 + }, + { + "epoch": 11.29, + "learning_rate": 1.7741814076025593e-05, + "loss": 0.1257, + "step": 30000 + }, + { + "epoch": 11.33, + "learning_rate": 1.7734286789612346e-05, + "loss": 0.125, + "step": 30100 + }, + { + "epoch": 11.37, + "learning_rate": 1.7726759503199098e-05, + "loss": 0.128, + "step": 30200 + }, + { + "epoch": 11.4, + "learning_rate": 1.771923221678585e-05, + "loss": 0.1271, + "step": 30300 + }, + { + "epoch": 11.44, + "learning_rate": 1.7711704930372603e-05, + "loss": 0.1268, + "step": 30400 + }, + { + "epoch": 11.48, + "learning_rate": 1.7704177643959356e-05, + "loss": 0.1262, + "step": 30500 + }, + { + "epoch": 11.52, + "learning_rate": 1.7696650357546105e-05, + "loss": 0.1247, + "step": 30600 + }, + { + "epoch": 11.55, + "learning_rate": 1.7689123071132857e-05, + "loss": 0.1243, + "step": 30700 + }, + { + "epoch": 11.59, + "learning_rate": 1.768159578471961e-05, + "loss": 0.1249, + "step": 30800 + }, + { + "epoch": 11.63, + "learning_rate": 1.7674068498306362e-05, + "loss": 0.1255, + "step": 30900 + }, + { + "epoch": 11.67, + "learning_rate": 1.7666541211893115e-05, + "loss": 0.1258, + "step": 31000 + }, + { + "epoch": 11.7, + "learning_rate": 1.7659013925479867e-05, + "loss": 0.1234, + "step": 31100 + }, + { + "epoch": 11.74, + "learning_rate": 1.7651486639066616e-05, + "loss": 0.1226, + "step": 31200 + }, + { + "epoch": 11.78, + "learning_rate": 1.764395935265337e-05, + "loss": 0.1251, + "step": 31300 + }, + { + "epoch": 11.82, + "learning_rate": 1.763643206624012e-05, + "loss": 0.1238, + "step": 31400 + }, + { + "epoch": 11.86, + "learning_rate": 1.7628904779826874e-05, + "loss": 0.1262, + "step": 31500 + }, + { + "epoch": 11.89, + "learning_rate": 1.7621377493413626e-05, + "loss": 0.1249, + "step": 31600 + }, + { + "epoch": 11.93, + "learning_rate": 1.761385020700038e-05, + "loss": 0.1224, + "step": 31700 + }, + { + "epoch": 11.97, + "learning_rate": 1.7606322920587128e-05, + "loss": 0.126, + "step": 31800 + }, + { + "epoch": 12.0, + "eval_loss": 0.12289831042289734, + "eval_runtime": 44.279, + "eval_samples_per_second": 169.381, + "eval_steps_per_second": 10.592, + "step": 31884 + }, + { + "epoch": 12.01, + "learning_rate": 1.759879563417388e-05, + "loss": 0.1247, + "step": 31900 + }, + { + "epoch": 12.04, + "learning_rate": 1.7591268347760633e-05, + "loss": 0.1224, + "step": 32000 + }, + { + "epoch": 12.08, + "learning_rate": 1.7583741061347386e-05, + "loss": 0.1225, + "step": 32100 + }, + { + "epoch": 12.12, + "learning_rate": 1.7576213774934138e-05, + "loss": 0.1247, + "step": 32200 + }, + { + "epoch": 12.16, + "learning_rate": 1.756868648852089e-05, + "loss": 0.1244, + "step": 32300 + }, + { + "epoch": 12.19, + "learning_rate": 1.7561159202107643e-05, + "loss": 0.1231, + "step": 32400 + }, + { + "epoch": 12.23, + "learning_rate": 1.7553631915694392e-05, + "loss": 0.1215, + "step": 32500 + }, + { + "epoch": 12.27, + "learning_rate": 1.7546104629281148e-05, + "loss": 0.1238, + "step": 32600 + }, + { + "epoch": 12.31, + "learning_rate": 1.7538577342867897e-05, + "loss": 0.1251, + "step": 32700 + }, + { + "epoch": 12.34, + "learning_rate": 1.753105005645465e-05, + "loss": 0.1248, + "step": 32800 + }, + { + "epoch": 12.38, + "learning_rate": 1.7523522770041402e-05, + "loss": 0.1227, + "step": 32900 + }, + { + "epoch": 12.42, + "learning_rate": 1.7515995483628155e-05, + "loss": 0.1219, + "step": 33000 + }, + { + "epoch": 12.46, + "learning_rate": 1.7508468197214904e-05, + "loss": 0.121, + "step": 33100 + }, + { + "epoch": 12.5, + "learning_rate": 1.7500940910801657e-05, + "loss": 0.1225, + "step": 33200 + }, + { + "epoch": 12.53, + "learning_rate": 1.749341362438841e-05, + "loss": 0.1224, + "step": 33300 + }, + { + "epoch": 12.57, + "learning_rate": 1.748588633797516e-05, + "loss": 0.1214, + "step": 33400 + }, + { + "epoch": 12.61, + "learning_rate": 1.7478359051561914e-05, + "loss": 0.1217, + "step": 33500 + }, + { + "epoch": 12.65, + "learning_rate": 1.7470831765148667e-05, + "loss": 0.1201, + "step": 33600 + }, + { + "epoch": 12.68, + "learning_rate": 1.7463304478735416e-05, + "loss": 0.1212, + "step": 33700 + }, + { + "epoch": 12.72, + "learning_rate": 1.7455777192322168e-05, + "loss": 0.1218, + "step": 33800 + }, + { + "epoch": 12.76, + "learning_rate": 1.744824990590892e-05, + "loss": 0.1221, + "step": 33900 + }, + { + "epoch": 12.8, + "learning_rate": 1.7440722619495673e-05, + "loss": 0.1199, + "step": 34000 + }, + { + "epoch": 12.83, + "learning_rate": 1.7433195333082426e-05, + "loss": 0.122, + "step": 34100 + }, + { + "epoch": 12.87, + "learning_rate": 1.7425668046669178e-05, + "loss": 0.12, + "step": 34200 + }, + { + "epoch": 12.91, + "learning_rate": 1.7418140760255927e-05, + "loss": 0.1222, + "step": 34300 + }, + { + "epoch": 12.95, + "learning_rate": 1.741061347384268e-05, + "loss": 0.1228, + "step": 34400 + }, + { + "epoch": 12.98, + "learning_rate": 1.7403086187429432e-05, + "loss": 0.1216, + "step": 34500 + }, + { + "epoch": 13.0, + "eval_loss": 0.11841125041246414, + "eval_runtime": 44.6411, + "eval_samples_per_second": 168.006, + "eval_steps_per_second": 10.506, + "step": 34541 + }, + { + "epoch": 13.02, + "learning_rate": 1.7395558901016185e-05, + "loss": 0.1195, + "step": 34600 + }, + { + "epoch": 13.06, + "learning_rate": 1.7388031614602937e-05, + "loss": 0.1211, + "step": 34700 + }, + { + "epoch": 13.1, + "learning_rate": 1.738050432818969e-05, + "loss": 0.1209, + "step": 34800 + }, + { + "epoch": 13.14, + "learning_rate": 1.7372977041776442e-05, + "loss": 0.122, + "step": 34900 + }, + { + "epoch": 13.17, + "learning_rate": 1.736544975536319e-05, + "loss": 0.1206, + "step": 35000 + }, + { + "epoch": 13.21, + "learning_rate": 1.7357922468949947e-05, + "loss": 0.1198, + "step": 35100 + }, + { + "epoch": 13.25, + "learning_rate": 1.7350395182536697e-05, + "loss": 0.1196, + "step": 35200 + }, + { + "epoch": 13.29, + "learning_rate": 1.734286789612345e-05, + "loss": 0.1192, + "step": 35300 + }, + { + "epoch": 13.32, + "learning_rate": 1.73353406097102e-05, + "loss": 0.12, + "step": 35400 + }, + { + "epoch": 13.36, + "learning_rate": 1.7327813323296954e-05, + "loss": 0.1179, + "step": 35500 + }, + { + "epoch": 13.4, + "learning_rate": 1.7320286036883703e-05, + "loss": 0.1201, + "step": 35600 + }, + { + "epoch": 13.44, + "learning_rate": 1.731275875047046e-05, + "loss": 0.1181, + "step": 35700 + }, + { + "epoch": 13.47, + "learning_rate": 1.730523146405721e-05, + "loss": 0.1185, + "step": 35800 + }, + { + "epoch": 13.51, + "learning_rate": 1.729770417764396e-05, + "loss": 0.1171, + "step": 35900 + }, + { + "epoch": 13.55, + "learning_rate": 1.7290176891230713e-05, + "loss": 0.1202, + "step": 36000 + }, + { + "epoch": 13.59, + "learning_rate": 1.7282649604817466e-05, + "loss": 0.1191, + "step": 36100 + }, + { + "epoch": 13.62, + "learning_rate": 1.7275122318404215e-05, + "loss": 0.1201, + "step": 36200 + }, + { + "epoch": 13.66, + "learning_rate": 1.7267595031990967e-05, + "loss": 0.1188, + "step": 36300 + }, + { + "epoch": 13.7, + "learning_rate": 1.726006774557772e-05, + "loss": 0.1191, + "step": 36400 + }, + { + "epoch": 13.74, + "learning_rate": 1.7252540459164473e-05, + "loss": 0.117, + "step": 36500 + }, + { + "epoch": 13.77, + "learning_rate": 1.7245013172751225e-05, + "loss": 0.1177, + "step": 36600 + }, + { + "epoch": 13.81, + "learning_rate": 1.7237485886337978e-05, + "loss": 0.1173, + "step": 36700 + }, + { + "epoch": 13.85, + "learning_rate": 1.7229958599924727e-05, + "loss": 0.1176, + "step": 36800 + }, + { + "epoch": 13.89, + "learning_rate": 1.722243131351148e-05, + "loss": 0.1177, + "step": 36900 + }, + { + "epoch": 13.93, + "learning_rate": 1.7214904027098232e-05, + "loss": 0.1168, + "step": 37000 + }, + { + "epoch": 13.96, + "learning_rate": 1.7207376740684984e-05, + "loss": 0.1175, + "step": 37100 + }, + { + "epoch": 14.0, + "eval_loss": 0.1184767335653305, + "eval_runtime": 44.9958, + "eval_samples_per_second": 166.682, + "eval_steps_per_second": 10.423, + "step": 37198 + }, + { + "epoch": 14.0, + "learning_rate": 1.7199849454271737e-05, + "loss": 0.1172, + "step": 37200 + }, + { + "epoch": 14.04, + "learning_rate": 1.719232216785849e-05, + "loss": 0.117, + "step": 37300 + }, + { + "epoch": 14.08, + "learning_rate": 1.7184794881445242e-05, + "loss": 0.1175, + "step": 37400 + }, + { + "epoch": 14.11, + "learning_rate": 1.717726759503199e-05, + "loss": 0.1164, + "step": 37500 + }, + { + "epoch": 14.15, + "learning_rate": 1.7169740308618747e-05, + "loss": 0.1161, + "step": 37600 + }, + { + "epoch": 14.19, + "learning_rate": 1.7162213022205496e-05, + "loss": 0.1183, + "step": 37700 + }, + { + "epoch": 14.23, + "learning_rate": 1.715468573579225e-05, + "loss": 0.1161, + "step": 37800 + }, + { + "epoch": 14.26, + "learning_rate": 1.7147158449379e-05, + "loss": 0.117, + "step": 37900 + }, + { + "epoch": 14.3, + "learning_rate": 1.7139631162965753e-05, + "loss": 0.1169, + "step": 38000 + }, + { + "epoch": 14.34, + "learning_rate": 1.7132103876552503e-05, + "loss": 0.1171, + "step": 38100 + }, + { + "epoch": 14.38, + "learning_rate": 1.712457659013926e-05, + "loss": 0.1165, + "step": 38200 + }, + { + "epoch": 14.41, + "learning_rate": 1.7117049303726008e-05, + "loss": 0.115, + "step": 38300 + }, + { + "epoch": 14.45, + "learning_rate": 1.710952201731276e-05, + "loss": 0.1164, + "step": 38400 + }, + { + "epoch": 14.49, + "learning_rate": 1.7101994730899513e-05, + "loss": 0.1151, + "step": 38500 + }, + { + "epoch": 14.53, + "learning_rate": 1.7094467444486265e-05, + "loss": 0.1163, + "step": 38600 + }, + { + "epoch": 14.57, + "learning_rate": 1.7086940158073014e-05, + "loss": 0.1157, + "step": 38700 + }, + { + "epoch": 14.6, + "learning_rate": 1.7079412871659767e-05, + "loss": 0.1154, + "step": 38800 + }, + { + "epoch": 14.64, + "learning_rate": 1.707188558524652e-05, + "loss": 0.1145, + "step": 38900 + }, + { + "epoch": 14.68, + "learning_rate": 1.7064358298833272e-05, + "loss": 0.1153, + "step": 39000 + }, + { + "epoch": 14.72, + "learning_rate": 1.7056831012420024e-05, + "loss": 0.1163, + "step": 39100 + }, + { + "epoch": 14.75, + "learning_rate": 1.7049303726006777e-05, + "loss": 0.1173, + "step": 39200 + }, + { + "epoch": 14.79, + "learning_rate": 1.7041776439593526e-05, + "loss": 0.1161, + "step": 39300 + }, + { + "epoch": 14.83, + "learning_rate": 1.703424915318028e-05, + "loss": 0.1144, + "step": 39400 + }, + { + "epoch": 14.87, + "learning_rate": 1.702672186676703e-05, + "loss": 0.1152, + "step": 39500 + }, + { + "epoch": 14.9, + "learning_rate": 1.7019194580353783e-05, + "loss": 0.1129, + "step": 39600 + }, + { + "epoch": 14.94, + "learning_rate": 1.7011667293940536e-05, + "loss": 0.1159, + "step": 39700 + }, + { + "epoch": 14.98, + "learning_rate": 1.700414000752729e-05, + "loss": 0.1137, + "step": 39800 + }, + { + "epoch": 15.0, + "eval_loss": 0.11463519930839539, + "eval_runtime": 44.7206, + "eval_samples_per_second": 167.708, + "eval_steps_per_second": 10.487, + "step": 39855 + }, + { + "epoch": 15.02, + "learning_rate": 1.699661272111404e-05, + "loss": 0.1136, + "step": 39900 + }, + { + "epoch": 15.05, + "learning_rate": 1.698908543470079e-05, + "loss": 0.1154, + "step": 40000 + }, + { + "epoch": 15.09, + "learning_rate": 1.6981558148287546e-05, + "loss": 0.1119, + "step": 40100 + }, + { + "epoch": 15.13, + "learning_rate": 1.6974030861874295e-05, + "loss": 0.1147, + "step": 40200 + }, + { + "epoch": 15.17, + "learning_rate": 1.6966503575461048e-05, + "loss": 0.1133, + "step": 40300 + }, + { + "epoch": 15.21, + "learning_rate": 1.69589762890478e-05, + "loss": 0.1159, + "step": 40400 + }, + { + "epoch": 15.24, + "learning_rate": 1.6951449002634553e-05, + "loss": 0.1123, + "step": 40500 + }, + { + "epoch": 15.28, + "learning_rate": 1.6943921716221302e-05, + "loss": 0.1144, + "step": 40600 + }, + { + "epoch": 15.32, + "learning_rate": 1.6936394429808058e-05, + "loss": 0.1156, + "step": 40700 + }, + { + "epoch": 15.36, + "learning_rate": 1.6928867143394807e-05, + "loss": 0.115, + "step": 40800 + }, + { + "epoch": 15.39, + "learning_rate": 1.692133985698156e-05, + "loss": 0.1129, + "step": 40900 + }, + { + "epoch": 15.43, + "learning_rate": 1.6913812570568312e-05, + "loss": 0.1136, + "step": 41000 + }, + { + "epoch": 15.47, + "learning_rate": 1.6906285284155064e-05, + "loss": 0.1127, + "step": 41100 + }, + { + "epoch": 15.51, + "learning_rate": 1.6898757997741814e-05, + "loss": 0.1119, + "step": 41200 + }, + { + "epoch": 15.54, + "learning_rate": 1.689123071132857e-05, + "loss": 0.1131, + "step": 41300 + }, + { + "epoch": 15.58, + "learning_rate": 1.688370342491532e-05, + "loss": 0.1112, + "step": 41400 + }, + { + "epoch": 15.62, + "learning_rate": 1.687617613850207e-05, + "loss": 0.1149, + "step": 41500 + }, + { + "epoch": 15.66, + "learning_rate": 1.6868648852088824e-05, + "loss": 0.1133, + "step": 41600 + }, + { + "epoch": 15.69, + "learning_rate": 1.6861121565675576e-05, + "loss": 0.1123, + "step": 41700 + }, + { + "epoch": 15.73, + "learning_rate": 1.6853594279262325e-05, + "loss": 0.1138, + "step": 41800 + }, + { + "epoch": 15.77, + "learning_rate": 1.6846066992849078e-05, + "loss": 0.1134, + "step": 41900 + }, + { + "epoch": 15.81, + "learning_rate": 1.683853970643583e-05, + "loss": 0.1138, + "step": 42000 + }, + { + "epoch": 15.84, + "learning_rate": 1.6831012420022583e-05, + "loss": 0.1122, + "step": 42100 + }, + { + "epoch": 15.88, + "learning_rate": 1.6823485133609335e-05, + "loss": 0.1127, + "step": 42200 + }, + { + "epoch": 15.92, + "learning_rate": 1.6815957847196088e-05, + "loss": 0.1147, + "step": 42300 + }, + { + "epoch": 15.96, + "learning_rate": 1.680843056078284e-05, + "loss": 0.1127, + "step": 42400 + }, + { + "epoch": 16.0, + "learning_rate": 1.680090327436959e-05, + "loss": 0.1125, + "step": 42500 + }, + { + "epoch": 16.0, + "eval_loss": 0.1117386743426323, + "eval_runtime": 44.2589, + "eval_samples_per_second": 169.457, + "eval_steps_per_second": 10.597, + "step": 42512 + }, + { + "epoch": 16.03, + "learning_rate": 1.6793375987956345e-05, + "loss": 0.1115, + "step": 42600 + }, + { + "epoch": 16.07, + "learning_rate": 1.6785848701543094e-05, + "loss": 0.1128, + "step": 42700 + }, + { + "epoch": 16.11, + "learning_rate": 1.6778321415129847e-05, + "loss": 0.1115, + "step": 42800 + }, + { + "epoch": 16.15, + "learning_rate": 1.67707941287166e-05, + "loss": 0.1119, + "step": 42900 + }, + { + "epoch": 16.18, + "learning_rate": 1.6763266842303352e-05, + "loss": 0.1126, + "step": 43000 + }, + { + "epoch": 16.22, + "learning_rate": 1.67557395558901e-05, + "loss": 0.1127, + "step": 43100 + }, + { + "epoch": 16.26, + "learning_rate": 1.6748212269476857e-05, + "loss": 0.1145, + "step": 43200 + }, + { + "epoch": 16.3, + "learning_rate": 1.6740684983063606e-05, + "loss": 0.1118, + "step": 43300 + }, + { + "epoch": 16.33, + "learning_rate": 1.673315769665036e-05, + "loss": 0.1129, + "step": 43400 + }, + { + "epoch": 16.37, + "learning_rate": 1.672563041023711e-05, + "loss": 0.113, + "step": 43500 + }, + { + "epoch": 16.41, + "learning_rate": 1.6718103123823864e-05, + "loss": 0.1129, + "step": 43600 + }, + { + "epoch": 16.45, + "learning_rate": 1.6710575837410613e-05, + "loss": 0.1098, + "step": 43700 + }, + { + "epoch": 16.48, + "learning_rate": 1.670304855099737e-05, + "loss": 0.112, + "step": 43800 + }, + { + "epoch": 16.52, + "learning_rate": 1.6695521264584118e-05, + "loss": 0.1117, + "step": 43900 + }, + { + "epoch": 16.56, + "learning_rate": 1.668799397817087e-05, + "loss": 0.1128, + "step": 44000 + }, + { + "epoch": 16.6, + "learning_rate": 1.6680466691757623e-05, + "loss": 0.1133, + "step": 44100 + }, + { + "epoch": 16.64, + "learning_rate": 1.6672939405344375e-05, + "loss": 0.11, + "step": 44200 + }, + { + "epoch": 16.67, + "learning_rate": 1.6665412118931125e-05, + "loss": 0.1138, + "step": 44300 + }, + { + "epoch": 16.71, + "learning_rate": 1.665788483251788e-05, + "loss": 0.1108, + "step": 44400 + }, + { + "epoch": 16.75, + "learning_rate": 1.665035754610463e-05, + "loss": 0.1096, + "step": 44500 + }, + { + "epoch": 16.79, + "learning_rate": 1.6642830259691382e-05, + "loss": 0.1092, + "step": 44600 + }, + { + "epoch": 16.82, + "learning_rate": 1.6635302973278135e-05, + "loss": 0.1106, + "step": 44700 + }, + { + "epoch": 16.86, + "learning_rate": 1.6627775686864887e-05, + "loss": 0.1118, + "step": 44800 + }, + { + "epoch": 16.9, + "learning_rate": 1.662024840045164e-05, + "loss": 0.1116, + "step": 44900 + }, + { + "epoch": 16.94, + "learning_rate": 1.661272111403839e-05, + "loss": 0.1095, + "step": 45000 + }, + { + "epoch": 16.97, + "learning_rate": 1.6605193827625145e-05, + "loss": 0.1112, + "step": 45100 + }, + { + "epoch": 17.0, + "eval_loss": 0.10999125987291336, + "eval_runtime": 44.3431, + "eval_samples_per_second": 169.136, + "eval_steps_per_second": 10.577, + "step": 45169 + }, + { + "epoch": 17.01, + "learning_rate": 1.6597666541211894e-05, + "loss": 0.11, + "step": 45200 + }, + { + "epoch": 17.05, + "learning_rate": 1.6590139254798646e-05, + "loss": 0.1107, + "step": 45300 + }, + { + "epoch": 17.09, + "learning_rate": 1.65826119683854e-05, + "loss": 0.1127, + "step": 45400 + }, + { + "epoch": 17.12, + "learning_rate": 1.657508468197215e-05, + "loss": 0.1106, + "step": 45500 + }, + { + "epoch": 17.16, + "learning_rate": 1.65675573955589e-05, + "loss": 0.1098, + "step": 45600 + }, + { + "epoch": 17.2, + "learning_rate": 1.6560030109145656e-05, + "loss": 0.111, + "step": 45700 + }, + { + "epoch": 17.24, + "learning_rate": 1.6552502822732405e-05, + "loss": 0.1094, + "step": 45800 + }, + { + "epoch": 17.28, + "learning_rate": 1.6544975536319158e-05, + "loss": 0.1098, + "step": 45900 + }, + { + "epoch": 17.31, + "learning_rate": 1.653744824990591e-05, + "loss": 0.11, + "step": 46000 + }, + { + "epoch": 17.35, + "learning_rate": 1.6529920963492663e-05, + "loss": 0.1101, + "step": 46100 + }, + { + "epoch": 17.39, + "learning_rate": 1.6522393677079412e-05, + "loss": 0.1113, + "step": 46200 + }, + { + "epoch": 17.43, + "learning_rate": 1.6514866390666168e-05, + "loss": 0.112, + "step": 46300 + }, + { + "epoch": 17.46, + "learning_rate": 1.6507339104252917e-05, + "loss": 0.1109, + "step": 46400 + }, + { + "epoch": 17.5, + "learning_rate": 1.649981181783967e-05, + "loss": 0.1108, + "step": 46500 + }, + { + "epoch": 17.54, + "learning_rate": 1.6492284531426422e-05, + "loss": 0.1092, + "step": 46600 + }, + { + "epoch": 17.58, + "learning_rate": 1.6484757245013175e-05, + "loss": 0.1097, + "step": 46700 + }, + { + "epoch": 17.61, + "learning_rate": 1.6477229958599924e-05, + "loss": 0.1098, + "step": 46800 + }, + { + "epoch": 17.65, + "learning_rate": 1.646970267218668e-05, + "loss": 0.1081, + "step": 46900 + }, + { + "epoch": 17.69, + "learning_rate": 1.646217538577343e-05, + "loss": 0.1085, + "step": 47000 + }, + { + "epoch": 17.73, + "learning_rate": 1.645464809936018e-05, + "loss": 0.1089, + "step": 47100 + }, + { + "epoch": 17.76, + "learning_rate": 1.6447120812946934e-05, + "loss": 0.1084, + "step": 47200 + }, + { + "epoch": 17.8, + "learning_rate": 1.6439593526533686e-05, + "loss": 0.1093, + "step": 47300 + }, + { + "epoch": 17.84, + "learning_rate": 1.643206624012044e-05, + "loss": 0.1115, + "step": 47400 + }, + { + "epoch": 17.88, + "learning_rate": 1.642453895370719e-05, + "loss": 0.1091, + "step": 47500 + }, + { + "epoch": 17.91, + "learning_rate": 1.6417011667293944e-05, + "loss": 0.11, + "step": 47600 + }, + { + "epoch": 17.95, + "learning_rate": 1.6409484380880693e-05, + "loss": 0.1096, + "step": 47700 + }, + { + "epoch": 17.99, + "learning_rate": 1.6401957094467446e-05, + "loss": 0.1108, + "step": 47800 + }, + { + "epoch": 18.0, + "eval_loss": 0.10891053080558777, + "eval_runtime": 44.4735, + "eval_samples_per_second": 168.64, + "eval_steps_per_second": 10.546, + "step": 47826 + }, + { + "epoch": 18.03, + "learning_rate": 1.6394429808054198e-05, + "loss": 0.1076, + "step": 47900 + }, + { + "epoch": 18.07, + "learning_rate": 1.638690252164095e-05, + "loss": 0.1092, + "step": 48000 + }, + { + "epoch": 18.1, + "learning_rate": 1.63793752352277e-05, + "loss": 0.1081, + "step": 48100 + }, + { + "epoch": 18.14, + "learning_rate": 1.6371847948814456e-05, + "loss": 0.1089, + "step": 48200 + }, + { + "epoch": 18.18, + "learning_rate": 1.6364320662401205e-05, + "loss": 0.1097, + "step": 48300 + }, + { + "epoch": 18.22, + "learning_rate": 1.6356793375987957e-05, + "loss": 0.11, + "step": 48400 + }, + { + "epoch": 18.25, + "learning_rate": 1.634926608957471e-05, + "loss": 0.107, + "step": 48500 + }, + { + "epoch": 18.29, + "learning_rate": 1.6341738803161462e-05, + "loss": 0.109, + "step": 48600 + }, + { + "epoch": 18.33, + "learning_rate": 1.633421151674821e-05, + "loss": 0.1092, + "step": 48700 + }, + { + "epoch": 18.37, + "learning_rate": 1.6326684230334967e-05, + "loss": 0.109, + "step": 48800 + }, + { + "epoch": 18.4, + "learning_rate": 1.6319156943921716e-05, + "loss": 0.1068, + "step": 48900 + }, + { + "epoch": 18.44, + "learning_rate": 1.631162965750847e-05, + "loss": 0.1091, + "step": 49000 + }, + { + "epoch": 18.48, + "learning_rate": 1.630410237109522e-05, + "loss": 0.1087, + "step": 49100 + }, + { + "epoch": 18.52, + "learning_rate": 1.6296575084681974e-05, + "loss": 0.1078, + "step": 49200 + }, + { + "epoch": 18.55, + "learning_rate": 1.6289047798268723e-05, + "loss": 0.109, + "step": 49300 + }, + { + "epoch": 18.59, + "learning_rate": 1.628152051185548e-05, + "loss": 0.1112, + "step": 49400 + }, + { + "epoch": 18.63, + "learning_rate": 1.6273993225442228e-05, + "loss": 0.1098, + "step": 49500 + }, + { + "epoch": 18.67, + "learning_rate": 1.626646593902898e-05, + "loss": 0.11, + "step": 49600 + }, + { + "epoch": 18.71, + "learning_rate": 1.6258938652615733e-05, + "loss": 0.1085, + "step": 49700 + }, + { + "epoch": 18.74, + "learning_rate": 1.6251411366202486e-05, + "loss": 0.1088, + "step": 49800 + }, + { + "epoch": 18.78, + "learning_rate": 1.6243884079789238e-05, + "loss": 0.1093, + "step": 49900 + }, + { + "epoch": 18.82, + "learning_rate": 1.623635679337599e-05, + "loss": 0.1069, + "step": 50000 + }, + { + "epoch": 18.86, + "learning_rate": 1.6228829506962743e-05, + "loss": 0.1082, + "step": 50100 + }, + { + "epoch": 18.89, + "learning_rate": 1.6221302220549492e-05, + "loss": 0.1093, + "step": 50200 + }, + { + "epoch": 18.93, + "learning_rate": 1.6213774934136245e-05, + "loss": 0.1074, + "step": 50300 + }, + { + "epoch": 18.97, + "learning_rate": 1.6206247647722997e-05, + "loss": 0.1061, + "step": 50400 + }, + { + "epoch": 19.0, + "eval_loss": 0.10703522711992264, + "eval_runtime": 45.2943, + "eval_samples_per_second": 165.584, + "eval_steps_per_second": 10.354, + "step": 50483 + }, + { + "epoch": 19.01, + "learning_rate": 1.619872036130975e-05, + "loss": 0.1082, + "step": 50500 + }, + { + "epoch": 19.04, + "learning_rate": 1.6191193074896502e-05, + "loss": 0.1093, + "step": 50600 + }, + { + "epoch": 19.08, + "learning_rate": 1.6183665788483255e-05, + "loss": 0.1078, + "step": 50700 + }, + { + "epoch": 19.12, + "learning_rate": 1.6176138502070004e-05, + "loss": 0.1069, + "step": 50800 + }, + { + "epoch": 19.16, + "learning_rate": 1.6168611215656757e-05, + "loss": 0.1092, + "step": 50900 + }, + { + "epoch": 19.19, + "learning_rate": 1.616108392924351e-05, + "loss": 0.1064, + "step": 51000 + }, + { + "epoch": 19.23, + "learning_rate": 1.615355664283026e-05, + "loss": 0.1063, + "step": 51100 + }, + { + "epoch": 19.27, + "learning_rate": 1.614602935641701e-05, + "loss": 0.1071, + "step": 51200 + }, + { + "epoch": 19.31, + "learning_rate": 1.6138502070003767e-05, + "loss": 0.1083, + "step": 51300 + }, + { + "epoch": 19.35, + "learning_rate": 1.6130974783590516e-05, + "loss": 0.1079, + "step": 51400 + }, + { + "epoch": 19.38, + "learning_rate": 1.6123447497177268e-05, + "loss": 0.1081, + "step": 51500 + }, + { + "epoch": 19.42, + "learning_rate": 1.611592021076402e-05, + "loss": 0.1086, + "step": 51600 + }, + { + "epoch": 19.46, + "learning_rate": 1.6108392924350773e-05, + "loss": 0.1073, + "step": 51700 + }, + { + "epoch": 19.5, + "learning_rate": 1.6100865637937522e-05, + "loss": 0.1082, + "step": 51800 + }, + { + "epoch": 19.53, + "learning_rate": 1.6093338351524278e-05, + "loss": 0.1081, + "step": 51900 + }, + { + "epoch": 19.57, + "learning_rate": 1.6085811065111027e-05, + "loss": 0.1081, + "step": 52000 + }, + { + "epoch": 19.61, + "learning_rate": 1.607828377869778e-05, + "loss": 0.1077, + "step": 52100 + }, + { + "epoch": 19.65, + "learning_rate": 1.6070756492284532e-05, + "loss": 0.108, + "step": 52200 + }, + { + "epoch": 19.68, + "learning_rate": 1.6063229205871285e-05, + "loss": 0.107, + "step": 52300 + }, + { + "epoch": 19.72, + "learning_rate": 1.6055701919458037e-05, + "loss": 0.1082, + "step": 52400 + }, + { + "epoch": 19.76, + "learning_rate": 1.604817463304479e-05, + "loss": 0.1082, + "step": 52500 + }, + { + "epoch": 19.8, + "learning_rate": 1.6040647346631542e-05, + "loss": 0.1067, + "step": 52600 + }, + { + "epoch": 19.83, + "learning_rate": 1.603312006021829e-05, + "loss": 0.1071, + "step": 52700 + }, + { + "epoch": 19.87, + "learning_rate": 1.6025592773805044e-05, + "loss": 0.1068, + "step": 52800 + }, + { + "epoch": 19.91, + "learning_rate": 1.6018065487391797e-05, + "loss": 0.1086, + "step": 52900 + }, + { + "epoch": 19.95, + "learning_rate": 1.601053820097855e-05, + "loss": 0.1085, + "step": 53000 + }, + { + "epoch": 19.98, + "learning_rate": 1.60030109145653e-05, + "loss": 0.1073, + "step": 53100 + }, + { + "epoch": 20.0, + "eval_loss": 0.10757213830947876, + "eval_runtime": 45.7115, + "eval_samples_per_second": 164.072, + "eval_steps_per_second": 10.26, + "step": 53140 + }, + { + "epoch": 20.02, + "learning_rate": 1.5995483628152054e-05, + "loss": 0.1084, + "step": 53200 + }, + { + "epoch": 20.06, + "learning_rate": 1.5987956341738803e-05, + "loss": 0.1089, + "step": 53300 + }, + { + "epoch": 20.1, + "learning_rate": 1.5980429055325556e-05, + "loss": 0.1085, + "step": 53400 + }, + { + "epoch": 20.14, + "learning_rate": 1.597290176891231e-05, + "loss": 0.1092, + "step": 53500 + }, + { + "epoch": 20.17, + "learning_rate": 1.596537448249906e-05, + "loss": 0.1073, + "step": 53600 + }, + { + "epoch": 20.21, + "learning_rate": 1.5957847196085813e-05, + "loss": 0.1087, + "step": 53700 + }, + { + "epoch": 20.25, + "learning_rate": 1.5950319909672566e-05, + "loss": 0.1071, + "step": 53800 + }, + { + "epoch": 20.29, + "learning_rate": 1.5942792623259315e-05, + "loss": 0.1061, + "step": 53900 + }, + { + "epoch": 20.32, + "learning_rate": 1.5935265336846068e-05, + "loss": 0.1055, + "step": 54000 + }, + { + "epoch": 20.36, + "learning_rate": 1.592773805043282e-05, + "loss": 0.1077, + "step": 54100 + }, + { + "epoch": 20.4, + "learning_rate": 1.5920210764019573e-05, + "loss": 0.108, + "step": 54200 + }, + { + "epoch": 20.44, + "learning_rate": 1.591268347760632e-05, + "loss": 0.1075, + "step": 54300 + }, + { + "epoch": 20.47, + "learning_rate": 1.5905156191193078e-05, + "loss": 0.1066, + "step": 54400 + }, + { + "epoch": 20.51, + "learning_rate": 1.5897628904779827e-05, + "loss": 0.1055, + "step": 54500 + }, + { + "epoch": 20.55, + "learning_rate": 1.589010161836658e-05, + "loss": 0.1069, + "step": 54600 + }, + { + "epoch": 20.59, + "learning_rate": 1.5882574331953332e-05, + "loss": 0.1068, + "step": 54700 + }, + { + "epoch": 20.62, + "learning_rate": 1.5875047045540084e-05, + "loss": 0.1065, + "step": 54800 + }, + { + "epoch": 20.66, + "learning_rate": 1.5867519759126837e-05, + "loss": 0.1053, + "step": 54900 + }, + { + "epoch": 20.7, + "learning_rate": 1.585999247271359e-05, + "loss": 0.1055, + "step": 55000 + }, + { + "epoch": 20.74, + "learning_rate": 1.5852465186300342e-05, + "loss": 0.107, + "step": 55100 + }, + { + "epoch": 20.78, + "learning_rate": 1.584493789988709e-05, + "loss": 0.1058, + "step": 55200 + }, + { + "epoch": 20.81, + "learning_rate": 1.5837410613473843e-05, + "loss": 0.1088, + "step": 55300 + }, + { + "epoch": 20.85, + "learning_rate": 1.5829883327060596e-05, + "loss": 0.1061, + "step": 55400 + }, + { + "epoch": 20.89, + "learning_rate": 1.582235604064735e-05, + "loss": 0.1066, + "step": 55500 + }, + { + "epoch": 20.93, + "learning_rate": 1.58148287542341e-05, + "loss": 0.1071, + "step": 55600 + }, + { + "epoch": 20.96, + "learning_rate": 1.5807301467820853e-05, + "loss": 0.1066, + "step": 55700 + }, + { + "epoch": 21.0, + "eval_loss": 0.1060996800661087, + "eval_runtime": 45.0415, + "eval_samples_per_second": 166.513, + "eval_steps_per_second": 10.413, + "step": 55797 + }, + { + "epoch": 21.0, + "learning_rate": 1.5799774181407603e-05, + "loss": 0.1054, + "step": 55800 + }, + { + "epoch": 21.04, + "learning_rate": 1.5792246894994355e-05, + "loss": 0.1053, + "step": 55900 + }, + { + "epoch": 21.08, + "learning_rate": 1.5784719608581108e-05, + "loss": 0.1055, + "step": 56000 + }, + { + "epoch": 21.11, + "learning_rate": 1.577719232216786e-05, + "loss": 0.1056, + "step": 56100 + }, + { + "epoch": 21.15, + "learning_rate": 1.5769665035754613e-05, + "loss": 0.106, + "step": 56200 + }, + { + "epoch": 21.19, + "learning_rate": 1.5762137749341365e-05, + "loss": 0.106, + "step": 56300 + }, + { + "epoch": 21.23, + "learning_rate": 1.5754610462928114e-05, + "loss": 0.1056, + "step": 56400 + }, + { + "epoch": 21.26, + "learning_rate": 1.5747083176514867e-05, + "loss": 0.1069, + "step": 56500 + }, + { + "epoch": 21.3, + "learning_rate": 1.573955589010162e-05, + "loss": 0.1054, + "step": 56600 + }, + { + "epoch": 21.34, + "learning_rate": 1.5732028603688372e-05, + "loss": 0.1087, + "step": 56700 + }, + { + "epoch": 21.38, + "learning_rate": 1.5724501317275124e-05, + "loss": 0.1085, + "step": 56800 + }, + { + "epoch": 21.42, + "learning_rate": 1.5716974030861877e-05, + "loss": 0.1052, + "step": 56900 + }, + { + "epoch": 21.45, + "learning_rate": 1.5709446744448626e-05, + "loss": 0.1072, + "step": 57000 + }, + { + "epoch": 21.49, + "learning_rate": 1.570191945803538e-05, + "loss": 0.1055, + "step": 57100 + }, + { + "epoch": 21.53, + "learning_rate": 1.569439217162213e-05, + "loss": 0.1067, + "step": 57200 + }, + { + "epoch": 21.57, + "learning_rate": 1.5686864885208884e-05, + "loss": 0.1057, + "step": 57300 + }, + { + "epoch": 21.6, + "learning_rate": 1.5679337598795636e-05, + "loss": 0.1057, + "step": 57400 + }, + { + "epoch": 21.64, + "learning_rate": 1.567181031238239e-05, + "loss": 0.106, + "step": 57500 + }, + { + "epoch": 21.68, + "learning_rate": 1.566428302596914e-05, + "loss": 0.1022, + "step": 57600 + }, + { + "epoch": 21.72, + "learning_rate": 1.565675573955589e-05, + "loss": 0.1066, + "step": 57700 + }, + { + "epoch": 21.75, + "learning_rate": 1.5649228453142643e-05, + "loss": 0.1063, + "step": 57800 + }, + { + "epoch": 21.79, + "learning_rate": 1.5641701166729395e-05, + "loss": 0.1072, + "step": 57900 + }, + { + "epoch": 21.83, + "learning_rate": 1.5634173880316148e-05, + "loss": 0.1044, + "step": 58000 + }, + { + "epoch": 21.87, + "learning_rate": 1.56266465939029e-05, + "loss": 0.1071, + "step": 58100 + }, + { + "epoch": 21.9, + "learning_rate": 1.5619119307489653e-05, + "loss": 0.1058, + "step": 58200 + }, + { + "epoch": 21.94, + "learning_rate": 1.5611592021076402e-05, + "loss": 0.1072, + "step": 58300 + }, + { + "epoch": 21.98, + "learning_rate": 1.5604064734663154e-05, + "loss": 0.1065, + "step": 58400 + }, + { + "epoch": 22.0, + "eval_loss": 0.10562047362327576, + "eval_runtime": 44.6592, + "eval_samples_per_second": 167.938, + "eval_steps_per_second": 10.502, + "step": 58454 + }, + { + "epoch": 22.02, + "learning_rate": 1.5596537448249907e-05, + "loss": 0.1042, + "step": 58500 + }, + { + "epoch": 22.05, + "learning_rate": 1.558901016183666e-05, + "loss": 0.1051, + "step": 58600 + }, + { + "epoch": 22.09, + "learning_rate": 1.5581482875423412e-05, + "loss": 0.1075, + "step": 58700 + }, + { + "epoch": 22.13, + "learning_rate": 1.5573955589010164e-05, + "loss": 0.1052, + "step": 58800 + }, + { + "epoch": 22.17, + "learning_rate": 1.5566428302596914e-05, + "loss": 0.1039, + "step": 58900 + }, + { + "epoch": 22.21, + "learning_rate": 1.5558901016183666e-05, + "loss": 0.1063, + "step": 59000 + }, + { + "epoch": 22.24, + "learning_rate": 1.555137372977042e-05, + "loss": 0.1048, + "step": 59100 + }, + { + "epoch": 22.28, + "learning_rate": 1.554384644335717e-05, + "loss": 0.1056, + "step": 59200 + }, + { + "epoch": 22.32, + "learning_rate": 1.5536319156943924e-05, + "loss": 0.1064, + "step": 59300 + }, + { + "epoch": 22.36, + "learning_rate": 1.5528791870530676e-05, + "loss": 0.1055, + "step": 59400 + }, + { + "epoch": 22.39, + "learning_rate": 1.5521264584117425e-05, + "loss": 0.1065, + "step": 59500 + }, + { + "epoch": 22.43, + "learning_rate": 1.5513737297704178e-05, + "loss": 0.1063, + "step": 59600 + }, + { + "epoch": 22.47, + "learning_rate": 1.550621001129093e-05, + "loss": 0.1045, + "step": 59700 + }, + { + "epoch": 22.51, + "learning_rate": 1.5498682724877683e-05, + "loss": 0.1041, + "step": 59800 + }, + { + "epoch": 22.54, + "learning_rate": 1.5491155438464435e-05, + "loss": 0.1057, + "step": 59900 + }, + { + "epoch": 22.58, + "learning_rate": 1.5483628152051188e-05, + "loss": 0.1048, + "step": 60000 + }, + { + "epoch": 22.62, + "learning_rate": 1.547610086563794e-05, + "loss": 0.1055, + "step": 60100 + }, + { + "epoch": 22.66, + "learning_rate": 1.546857357922469e-05, + "loss": 0.1021, + "step": 60200 + }, + { + "epoch": 22.69, + "learning_rate": 1.5461046292811442e-05, + "loss": 0.1044, + "step": 60300 + }, + { + "epoch": 22.73, + "learning_rate": 1.5453519006398194e-05, + "loss": 0.1056, + "step": 60400 + }, + { + "epoch": 22.77, + "learning_rate": 1.5445991719984947e-05, + "loss": 0.1057, + "step": 60500 + }, + { + "epoch": 22.81, + "learning_rate": 1.54384644335717e-05, + "loss": 0.1058, + "step": 60600 + }, + { + "epoch": 22.85, + "learning_rate": 1.5430937147158452e-05, + "loss": 0.1059, + "step": 60700 + }, + { + "epoch": 22.88, + "learning_rate": 1.54234098607452e-05, + "loss": 0.1058, + "step": 60800 + }, + { + "epoch": 22.92, + "learning_rate": 1.5415882574331954e-05, + "loss": 0.1052, + "step": 60900 + }, + { + "epoch": 22.96, + "learning_rate": 1.5408355287918706e-05, + "loss": 0.1062, + "step": 61000 + }, + { + "epoch": 23.0, + "learning_rate": 1.540082800150546e-05, + "loss": 0.1045, + "step": 61100 + }, + { + "epoch": 23.0, + "eval_loss": 0.10369115322828293, + "eval_runtime": 44.6965, + "eval_samples_per_second": 167.798, + "eval_steps_per_second": 10.493, + "step": 61111 + }, + { + "epoch": 23.03, + "learning_rate": 1.539330071509221e-05, + "loss": 0.1056, + "step": 61200 + }, + { + "epoch": 23.07, + "learning_rate": 1.5385773428678964e-05, + "loss": 0.1034, + "step": 61300 + }, + { + "epoch": 23.11, + "learning_rate": 1.5378246142265713e-05, + "loss": 0.1056, + "step": 61400 + }, + { + "epoch": 23.15, + "learning_rate": 1.5370718855852465e-05, + "loss": 0.1055, + "step": 61500 + }, + { + "epoch": 23.18, + "learning_rate": 1.5363191569439218e-05, + "loss": 0.1053, + "step": 61600 + }, + { + "epoch": 23.22, + "learning_rate": 1.535566428302597e-05, + "loss": 0.1049, + "step": 61700 + }, + { + "epoch": 23.26, + "learning_rate": 1.5348136996612723e-05, + "loss": 0.1062, + "step": 61800 + }, + { + "epoch": 23.3, + "learning_rate": 1.5340609710199475e-05, + "loss": 0.1038, + "step": 61900 + }, + { + "epoch": 23.33, + "learning_rate": 1.5333082423786225e-05, + "loss": 0.1048, + "step": 62000 + }, + { + "epoch": 23.37, + "learning_rate": 1.5325555137372977e-05, + "loss": 0.1054, + "step": 62100 + }, + { + "epoch": 23.41, + "learning_rate": 1.5318027850959733e-05, + "loss": 0.1041, + "step": 62200 + }, + { + "epoch": 23.45, + "learning_rate": 1.5310500564546482e-05, + "loss": 0.1048, + "step": 62300 + }, + { + "epoch": 23.49, + "learning_rate": 1.5302973278133235e-05, + "loss": 0.105, + "step": 62400 + }, + { + "epoch": 23.52, + "learning_rate": 1.5295445991719987e-05, + "loss": 0.1023, + "step": 62500 + }, + { + "epoch": 23.56, + "learning_rate": 1.528791870530674e-05, + "loss": 0.105, + "step": 62600 + }, + { + "epoch": 23.6, + "learning_rate": 1.528039141889349e-05, + "loss": 0.1046, + "step": 62700 + }, + { + "epoch": 23.64, + "learning_rate": 1.527286413248024e-05, + "loss": 0.1033, + "step": 62800 + }, + { + "epoch": 23.67, + "learning_rate": 1.5265336846066994e-05, + "loss": 0.1057, + "step": 62900 + }, + { + "epoch": 23.71, + "learning_rate": 1.5257809559653746e-05, + "loss": 0.1056, + "step": 63000 + }, + { + "epoch": 23.75, + "learning_rate": 1.5250282273240497e-05, + "loss": 0.1056, + "step": 63100 + }, + { + "epoch": 23.79, + "learning_rate": 1.5242754986827251e-05, + "loss": 0.1032, + "step": 63200 + }, + { + "epoch": 23.82, + "learning_rate": 1.5235227700414002e-05, + "loss": 0.1025, + "step": 63300 + }, + { + "epoch": 23.86, + "learning_rate": 1.5227700414000755e-05, + "loss": 0.1063, + "step": 63400 + }, + { + "epoch": 23.9, + "learning_rate": 1.5220173127587505e-05, + "loss": 0.1035, + "step": 63500 + }, + { + "epoch": 23.94, + "learning_rate": 1.5212645841174258e-05, + "loss": 0.104, + "step": 63600 + }, + { + "epoch": 23.97, + "learning_rate": 1.5205118554761009e-05, + "loss": 0.1052, + "step": 63700 + }, + { + "epoch": 24.0, + "eval_loss": 0.10549741983413696, + "eval_runtime": 43.683, + "eval_samples_per_second": 171.691, + "eval_steps_per_second": 10.736, + "step": 63768 + }, + { + "epoch": 24.01, + "learning_rate": 1.5197591268347763e-05, + "loss": 0.1051, + "step": 63800 + }, + { + "epoch": 24.05, + "learning_rate": 1.5190063981934514e-05, + "loss": 0.1031, + "step": 63900 + }, + { + "epoch": 24.09, + "learning_rate": 1.5182536695521266e-05, + "loss": 0.1057, + "step": 64000 + }, + { + "epoch": 24.12, + "learning_rate": 1.5175009409108017e-05, + "loss": 0.1048, + "step": 64100 + }, + { + "epoch": 24.16, + "learning_rate": 1.516748212269477e-05, + "loss": 0.1054, + "step": 64200 + }, + { + "epoch": 24.2, + "learning_rate": 1.515995483628152e-05, + "loss": 0.1042, + "step": 64300 + }, + { + "epoch": 24.24, + "learning_rate": 1.5152427549868273e-05, + "loss": 0.1049, + "step": 64400 + }, + { + "epoch": 24.28, + "learning_rate": 1.5144900263455024e-05, + "loss": 0.1039, + "step": 64500 + }, + { + "epoch": 24.31, + "learning_rate": 1.5137372977041778e-05, + "loss": 0.1039, + "step": 64600 + }, + { + "epoch": 24.35, + "learning_rate": 1.512984569062853e-05, + "loss": 0.104, + "step": 64700 + }, + { + "epoch": 24.39, + "learning_rate": 1.5122318404215281e-05, + "loss": 0.1039, + "step": 64800 + }, + { + "epoch": 24.43, + "learning_rate": 1.5114791117802034e-05, + "loss": 0.1031, + "step": 64900 + }, + { + "epoch": 24.46, + "learning_rate": 1.5107263831388785e-05, + "loss": 0.1019, + "step": 65000 + }, + { + "epoch": 24.5, + "learning_rate": 1.5099736544975539e-05, + "loss": 0.1041, + "step": 65100 + }, + { + "epoch": 24.54, + "learning_rate": 1.509220925856229e-05, + "loss": 0.1049, + "step": 65200 + }, + { + "epoch": 24.58, + "learning_rate": 1.5084681972149042e-05, + "loss": 0.1029, + "step": 65300 + }, + { + "epoch": 24.61, + "learning_rate": 1.5077154685735793e-05, + "loss": 0.105, + "step": 65400 + }, + { + "epoch": 24.65, + "learning_rate": 1.5069627399322546e-05, + "loss": 0.1041, + "step": 65500 + }, + { + "epoch": 24.69, + "learning_rate": 1.5062100112909296e-05, + "loss": 0.1032, + "step": 65600 + }, + { + "epoch": 24.73, + "learning_rate": 1.505457282649605e-05, + "loss": 0.1033, + "step": 65700 + }, + { + "epoch": 24.76, + "learning_rate": 1.5047045540082801e-05, + "loss": 0.1036, + "step": 65800 + }, + { + "epoch": 24.8, + "learning_rate": 1.5039518253669554e-05, + "loss": 0.1031, + "step": 65900 + }, + { + "epoch": 24.84, + "learning_rate": 1.5031990967256305e-05, + "loss": 0.1031, + "step": 66000 + }, + { + "epoch": 24.88, + "learning_rate": 1.5024463680843057e-05, + "loss": 0.103, + "step": 66100 + }, + { + "epoch": 24.92, + "learning_rate": 1.5016936394429808e-05, + "loss": 0.103, + "step": 66200 + }, + { + "epoch": 24.95, + "learning_rate": 1.5009409108016562e-05, + "loss": 0.103, + "step": 66300 + }, + { + "epoch": 24.99, + "learning_rate": 1.5001881821603313e-05, + "loss": 0.102, + "step": 66400 + }, + { + "epoch": 25.0, + "eval_loss": 0.10278935730457306, + "eval_runtime": 43.9205, + "eval_samples_per_second": 170.763, + "eval_steps_per_second": 10.678, + "step": 66425 + }, + { + "epoch": 25.03, + "learning_rate": 1.4994354535190066e-05, + "loss": 0.1033, + "step": 66500 + }, + { + "epoch": 25.07, + "learning_rate": 1.4986827248776816e-05, + "loss": 0.1022, + "step": 66600 + }, + { + "epoch": 25.1, + "learning_rate": 1.4979299962363569e-05, + "loss": 0.1028, + "step": 66700 + }, + { + "epoch": 25.14, + "learning_rate": 1.497177267595032e-05, + "loss": 0.1044, + "step": 66800 + }, + { + "epoch": 25.18, + "learning_rate": 1.4964245389537074e-05, + "loss": 0.1038, + "step": 66900 + }, + { + "epoch": 25.22, + "learning_rate": 1.4956718103123825e-05, + "loss": 0.1052, + "step": 67000 + }, + { + "epoch": 25.25, + "learning_rate": 1.4949190816710577e-05, + "loss": 0.1039, + "step": 67100 + }, + { + "epoch": 25.29, + "learning_rate": 1.494166353029733e-05, + "loss": 0.1039, + "step": 67200 + }, + { + "epoch": 25.33, + "learning_rate": 1.493413624388408e-05, + "loss": 0.1025, + "step": 67300 + }, + { + "epoch": 25.37, + "learning_rate": 1.4926608957470833e-05, + "loss": 0.1037, + "step": 67400 + }, + { + "epoch": 25.4, + "learning_rate": 1.4919081671057584e-05, + "loss": 0.1019, + "step": 67500 + }, + { + "epoch": 25.44, + "learning_rate": 1.4911554384644338e-05, + "loss": 0.1027, + "step": 67600 + }, + { + "epoch": 25.48, + "learning_rate": 1.4904027098231089e-05, + "loss": 0.1037, + "step": 67700 + }, + { + "epoch": 25.52, + "learning_rate": 1.4896499811817842e-05, + "loss": 0.1031, + "step": 67800 + }, + { + "epoch": 25.56, + "learning_rate": 1.4888972525404592e-05, + "loss": 0.1035, + "step": 67900 + }, + { + "epoch": 25.59, + "learning_rate": 1.4881445238991345e-05, + "loss": 0.1031, + "step": 68000 + }, + { + "epoch": 25.63, + "learning_rate": 1.4873917952578096e-05, + "loss": 0.1034, + "step": 68100 + }, + { + "epoch": 25.67, + "learning_rate": 1.486639066616485e-05, + "loss": 0.1037, + "step": 68200 + }, + { + "epoch": 25.71, + "learning_rate": 1.48588633797516e-05, + "loss": 0.104, + "step": 68300 + }, + { + "epoch": 25.74, + "learning_rate": 1.4851336093338353e-05, + "loss": 0.1036, + "step": 68400 + }, + { + "epoch": 25.78, + "learning_rate": 1.4843808806925104e-05, + "loss": 0.1031, + "step": 68500 + }, + { + "epoch": 25.82, + "learning_rate": 1.4836281520511857e-05, + "loss": 0.1027, + "step": 68600 + }, + { + "epoch": 25.86, + "learning_rate": 1.4828754234098607e-05, + "loss": 0.1036, + "step": 68700 + }, + { + "epoch": 25.89, + "learning_rate": 1.4821226947685362e-05, + "loss": 0.1023, + "step": 68800 + }, + { + "epoch": 25.93, + "learning_rate": 1.4813699661272112e-05, + "loss": 0.1015, + "step": 68900 + }, + { + "epoch": 25.97, + "learning_rate": 1.4806172374858865e-05, + "loss": 0.1025, + "step": 69000 + }, + { + "epoch": 26.0, + "eval_loss": 0.10342206060886383, + "eval_runtime": 45.4198, + "eval_samples_per_second": 165.126, + "eval_steps_per_second": 10.326, + "step": 69082 + }, + { + "epoch": 26.01, + "learning_rate": 1.4798645088445616e-05, + "loss": 0.1011, + "step": 69100 + }, + { + "epoch": 26.04, + "learning_rate": 1.4791117802032368e-05, + "loss": 0.1021, + "step": 69200 + }, + { + "epoch": 26.08, + "learning_rate": 1.4783590515619119e-05, + "loss": 0.1036, + "step": 69300 + }, + { + "epoch": 26.12, + "learning_rate": 1.4776063229205873e-05, + "loss": 0.1041, + "step": 69400 + }, + { + "epoch": 26.16, + "learning_rate": 1.4768535942792624e-05, + "loss": 0.1028, + "step": 69500 + }, + { + "epoch": 26.19, + "learning_rate": 1.4761008656379377e-05, + "loss": 0.1024, + "step": 69600 + }, + { + "epoch": 26.23, + "learning_rate": 1.4753481369966129e-05, + "loss": 0.1019, + "step": 69700 + }, + { + "epoch": 26.27, + "learning_rate": 1.474595408355288e-05, + "loss": 0.1022, + "step": 69800 + }, + { + "epoch": 26.31, + "learning_rate": 1.4738426797139634e-05, + "loss": 0.102, + "step": 69900 + }, + { + "epoch": 26.35, + "learning_rate": 1.4730899510726385e-05, + "loss": 0.1008, + "step": 70000 + }, + { + "epoch": 26.38, + "learning_rate": 1.4723372224313137e-05, + "loss": 0.1032, + "step": 70100 + }, + { + "epoch": 26.42, + "learning_rate": 1.4715844937899888e-05, + "loss": 0.1036, + "step": 70200 + }, + { + "epoch": 26.46, + "learning_rate": 1.470831765148664e-05, + "loss": 0.103, + "step": 70300 + }, + { + "epoch": 26.5, + "learning_rate": 1.4700790365073392e-05, + "loss": 0.1016, + "step": 70400 + }, + { + "epoch": 26.53, + "learning_rate": 1.4693263078660144e-05, + "loss": 0.1022, + "step": 70500 + }, + { + "epoch": 26.57, + "learning_rate": 1.4685735792246895e-05, + "loss": 0.1033, + "step": 70600 + }, + { + "epoch": 26.61, + "learning_rate": 1.4678208505833649e-05, + "loss": 0.1032, + "step": 70700 + }, + { + "epoch": 26.65, + "learning_rate": 1.46706812194204e-05, + "loss": 0.1026, + "step": 70800 + }, + { + "epoch": 26.68, + "learning_rate": 1.4663153933007152e-05, + "loss": 0.1019, + "step": 70900 + }, + { + "epoch": 26.72, + "learning_rate": 1.4655626646593903e-05, + "loss": 0.1035, + "step": 71000 + }, + { + "epoch": 26.76, + "learning_rate": 1.4648099360180656e-05, + "loss": 0.102, + "step": 71100 + }, + { + "epoch": 26.8, + "learning_rate": 1.4640572073767407e-05, + "loss": 0.1026, + "step": 71200 + }, + { + "epoch": 26.83, + "learning_rate": 1.463304478735416e-05, + "loss": 0.1023, + "step": 71300 + }, + { + "epoch": 26.87, + "learning_rate": 1.4625517500940912e-05, + "loss": 0.1011, + "step": 71400 + }, + { + "epoch": 26.91, + "learning_rate": 1.4617990214527664e-05, + "loss": 0.1037, + "step": 71500 + }, + { + "epoch": 26.95, + "learning_rate": 1.4610462928114415e-05, + "loss": 0.1036, + "step": 71600 + }, + { + "epoch": 26.99, + "learning_rate": 1.4602935641701168e-05, + "loss": 0.1037, + "step": 71700 + }, + { + "epoch": 27.0, + "eval_loss": 0.10246068239212036, + "eval_runtime": 45.3187, + "eval_samples_per_second": 165.495, + "eval_steps_per_second": 10.349, + "step": 71739 + }, + { + "epoch": 27.02, + "learning_rate": 1.4595408355287918e-05, + "loss": 0.1032, + "step": 71800 + }, + { + "epoch": 27.06, + "learning_rate": 1.4587881068874673e-05, + "loss": 0.1021, + "step": 71900 + }, + { + "epoch": 27.1, + "learning_rate": 1.4580353782461423e-05, + "loss": 0.1034, + "step": 72000 + }, + { + "epoch": 27.14, + "learning_rate": 1.4572826496048176e-05, + "loss": 0.1029, + "step": 72100 + }, + { + "epoch": 27.17, + "learning_rate": 1.4565299209634928e-05, + "loss": 0.1024, + "step": 72200 + }, + { + "epoch": 27.21, + "learning_rate": 1.455777192322168e-05, + "loss": 0.1028, + "step": 72300 + }, + { + "epoch": 27.25, + "learning_rate": 1.4550244636808433e-05, + "loss": 0.1053, + "step": 72400 + }, + { + "epoch": 27.29, + "learning_rate": 1.4542717350395184e-05, + "loss": 0.1013, + "step": 72500 + }, + { + "epoch": 27.32, + "learning_rate": 1.4535190063981937e-05, + "loss": 0.1012, + "step": 72600 + }, + { + "epoch": 27.36, + "learning_rate": 1.4527662777568688e-05, + "loss": 0.1024, + "step": 72700 + }, + { + "epoch": 27.4, + "learning_rate": 1.452013549115544e-05, + "loss": 0.1003, + "step": 72800 + }, + { + "epoch": 27.44, + "learning_rate": 1.4512608204742191e-05, + "loss": 0.103, + "step": 72900 + }, + { + "epoch": 27.47, + "learning_rate": 1.4505080918328945e-05, + "loss": 0.1012, + "step": 73000 + }, + { + "epoch": 27.51, + "learning_rate": 1.4497553631915696e-05, + "loss": 0.1015, + "step": 73100 + }, + { + "epoch": 27.55, + "learning_rate": 1.4490026345502448e-05, + "loss": 0.1032, + "step": 73200 + }, + { + "epoch": 27.59, + "learning_rate": 1.44824990590892e-05, + "loss": 0.1038, + "step": 73300 + }, + { + "epoch": 27.63, + "learning_rate": 1.4474971772675952e-05, + "loss": 0.0996, + "step": 73400 + }, + { + "epoch": 27.66, + "learning_rate": 1.4467444486262703e-05, + "loss": 0.1024, + "step": 73500 + }, + { + "epoch": 27.7, + "learning_rate": 1.4459917199849455e-05, + "loss": 0.1019, + "step": 73600 + }, + { + "epoch": 27.74, + "learning_rate": 1.4452389913436206e-05, + "loss": 0.1024, + "step": 73700 + }, + { + "epoch": 27.78, + "learning_rate": 1.444486262702296e-05, + "loss": 0.1006, + "step": 73800 + }, + { + "epoch": 27.81, + "learning_rate": 1.4437335340609711e-05, + "loss": 0.1014, + "step": 73900 + }, + { + "epoch": 27.85, + "learning_rate": 1.4429808054196463e-05, + "loss": 0.1024, + "step": 74000 + }, + { + "epoch": 27.89, + "learning_rate": 1.4422280767783214e-05, + "loss": 0.1023, + "step": 74100 + }, + { + "epoch": 27.93, + "learning_rate": 1.4414753481369967e-05, + "loss": 0.1034, + "step": 74200 + }, + { + "epoch": 27.96, + "learning_rate": 1.4407226194956718e-05, + "loss": 0.1022, + "step": 74300 + }, + { + "epoch": 28.0, + "eval_loss": 0.10144173353910446, + "eval_runtime": 45.583, + "eval_samples_per_second": 164.535, + "eval_steps_per_second": 10.289, + "step": 74396 + }, + { + "epoch": 28.0, + "learning_rate": 1.4399698908543472e-05, + "loss": 0.1023, + "step": 74400 + }, + { + "epoch": 28.04, + "learning_rate": 1.4392171622130224e-05, + "loss": 0.1017, + "step": 74500 + }, + { + "epoch": 28.08, + "learning_rate": 1.4384644335716975e-05, + "loss": 0.1017, + "step": 74600 + }, + { + "epoch": 28.11, + "learning_rate": 1.4377117049303728e-05, + "loss": 0.1008, + "step": 74700 + }, + { + "epoch": 28.15, + "learning_rate": 1.4369589762890478e-05, + "loss": 0.1023, + "step": 74800 + }, + { + "epoch": 28.19, + "learning_rate": 1.4362062476477233e-05, + "loss": 0.1022, + "step": 74900 + }, + { + "epoch": 28.23, + "learning_rate": 1.4354535190063984e-05, + "loss": 0.1017, + "step": 75000 + }, + { + "epoch": 28.26, + "learning_rate": 1.4347007903650736e-05, + "loss": 0.1012, + "step": 75100 + }, + { + "epoch": 28.3, + "learning_rate": 1.4339480617237487e-05, + "loss": 0.103, + "step": 75200 + }, + { + "epoch": 28.34, + "learning_rate": 1.433195333082424e-05, + "loss": 0.1025, + "step": 75300 + }, + { + "epoch": 28.38, + "learning_rate": 1.432442604441099e-05, + "loss": 0.1013, + "step": 75400 + }, + { + "epoch": 28.42, + "learning_rate": 1.4316898757997744e-05, + "loss": 0.1021, + "step": 75500 + }, + { + "epoch": 28.45, + "learning_rate": 1.4309371471584495e-05, + "loss": 0.1026, + "step": 75600 + }, + { + "epoch": 28.49, + "learning_rate": 1.4301844185171248e-05, + "loss": 0.1027, + "step": 75700 + }, + { + "epoch": 28.53, + "learning_rate": 1.4294316898757999e-05, + "loss": 0.1014, + "step": 75800 + }, + { + "epoch": 28.57, + "learning_rate": 1.4286789612344751e-05, + "loss": 0.1013, + "step": 75900 + }, + { + "epoch": 28.6, + "learning_rate": 1.4279262325931502e-05, + "loss": 0.1018, + "step": 76000 + }, + { + "epoch": 28.64, + "learning_rate": 1.4271735039518254e-05, + "loss": 0.102, + "step": 76100 + }, + { + "epoch": 28.68, + "learning_rate": 1.4264207753105005e-05, + "loss": 0.1013, + "step": 76200 + }, + { + "epoch": 28.72, + "learning_rate": 1.425668046669176e-05, + "loss": 0.1027, + "step": 76300 + }, + { + "epoch": 28.75, + "learning_rate": 1.424915318027851e-05, + "loss": 0.1004, + "step": 76400 + }, + { + "epoch": 28.79, + "learning_rate": 1.4241625893865263e-05, + "loss": 0.1013, + "step": 76500 + }, + { + "epoch": 28.83, + "learning_rate": 1.4234098607452014e-05, + "loss": 0.1021, + "step": 76600 + }, + { + "epoch": 28.87, + "learning_rate": 1.4226571321038766e-05, + "loss": 0.1009, + "step": 76700 + }, + { + "epoch": 28.9, + "learning_rate": 1.4219044034625517e-05, + "loss": 0.1012, + "step": 76800 + }, + { + "epoch": 28.94, + "learning_rate": 1.4211516748212271e-05, + "loss": 0.1015, + "step": 76900 + }, + { + "epoch": 28.98, + "learning_rate": 1.4203989461799024e-05, + "loss": 0.1026, + "step": 77000 + }, + { + "epoch": 29.0, + "eval_loss": 0.10109123587608337, + "eval_runtime": 45.9148, + "eval_samples_per_second": 163.346, + "eval_steps_per_second": 10.215, + "step": 77053 + }, + { + "epoch": 29.02, + "learning_rate": 1.4196462175385774e-05, + "loss": 0.102, + "step": 77100 + }, + { + "epoch": 29.06, + "learning_rate": 1.4188934888972527e-05, + "loss": 0.1028, + "step": 77200 + }, + { + "epoch": 29.09, + "learning_rate": 1.4181407602559278e-05, + "loss": 0.1016, + "step": 77300 + }, + { + "epoch": 29.13, + "learning_rate": 1.4173880316146032e-05, + "loss": 0.1021, + "step": 77400 + }, + { + "epoch": 29.17, + "learning_rate": 1.4166353029732783e-05, + "loss": 0.1018, + "step": 77500 + }, + { + "epoch": 29.21, + "learning_rate": 1.4158825743319535e-05, + "loss": 0.1004, + "step": 77600 + }, + { + "epoch": 29.24, + "learning_rate": 1.4151298456906286e-05, + "loss": 0.102, + "step": 77700 + }, + { + "epoch": 29.28, + "learning_rate": 1.4143771170493039e-05, + "loss": 0.1013, + "step": 77800 + }, + { + "epoch": 29.32, + "learning_rate": 1.413624388407979e-05, + "loss": 0.1014, + "step": 77900 + }, + { + "epoch": 29.36, + "learning_rate": 1.4128716597666544e-05, + "loss": 0.1003, + "step": 78000 + }, + { + "epoch": 29.39, + "learning_rate": 1.4121189311253294e-05, + "loss": 0.1009, + "step": 78100 + }, + { + "epoch": 29.43, + "learning_rate": 1.4113662024840047e-05, + "loss": 0.1008, + "step": 78200 + }, + { + "epoch": 29.47, + "learning_rate": 1.4106134738426798e-05, + "loss": 0.1015, + "step": 78300 + }, + { + "epoch": 29.51, + "learning_rate": 1.409860745201355e-05, + "loss": 0.1019, + "step": 78400 + }, + { + "epoch": 29.54, + "learning_rate": 1.4091080165600301e-05, + "loss": 0.1014, + "step": 78500 + }, + { + "epoch": 29.58, + "learning_rate": 1.4083552879187055e-05, + "loss": 0.1009, + "step": 78600 + }, + { + "epoch": 29.62, + "learning_rate": 1.4076025592773806e-05, + "loss": 0.1013, + "step": 78700 + }, + { + "epoch": 29.66, + "learning_rate": 1.4068498306360559e-05, + "loss": 0.1018, + "step": 78800 + }, + { + "epoch": 29.7, + "learning_rate": 1.406097101994731e-05, + "loss": 0.1026, + "step": 78900 + }, + { + "epoch": 29.73, + "learning_rate": 1.4053443733534062e-05, + "loss": 0.1005, + "step": 79000 + }, + { + "epoch": 29.77, + "learning_rate": 1.4045916447120813e-05, + "loss": 0.1009, + "step": 79100 + }, + { + "epoch": 29.81, + "learning_rate": 1.4038389160707565e-05, + "loss": 0.1001, + "step": 79200 + }, + { + "epoch": 29.85, + "learning_rate": 1.4030861874294316e-05, + "loss": 0.102, + "step": 79300 + }, + { + "epoch": 29.88, + "learning_rate": 1.402333458788107e-05, + "loss": 0.1011, + "step": 79400 + }, + { + "epoch": 29.92, + "learning_rate": 1.4015807301467823e-05, + "loss": 0.1011, + "step": 79500 + }, + { + "epoch": 29.96, + "learning_rate": 1.4008280015054574e-05, + "loss": 0.1011, + "step": 79600 + }, + { + "epoch": 30.0, + "learning_rate": 1.4000752728641326e-05, + "loss": 0.1022, + "step": 79700 + }, + { + "epoch": 30.0, + "eval_loss": 0.10009202361106873, + "eval_runtime": 45.5608, + "eval_samples_per_second": 164.615, + "eval_steps_per_second": 10.294, + "step": 79710 + }, + { + "epoch": 30.03, + "learning_rate": 1.3993225442228077e-05, + "loss": 0.1027, + "step": 79800 + }, + { + "epoch": 30.07, + "learning_rate": 1.3985698155814831e-05, + "loss": 0.1015, + "step": 79900 + }, + { + "epoch": 30.11, + "learning_rate": 1.3978170869401582e-05, + "loss": 0.1018, + "step": 80000 + }, + { + "epoch": 30.15, + "learning_rate": 1.3970643582988335e-05, + "loss": 0.1014, + "step": 80100 + }, + { + "epoch": 30.18, + "learning_rate": 1.3963116296575085e-05, + "loss": 0.1013, + "step": 80200 + }, + { + "epoch": 30.22, + "learning_rate": 1.3955589010161838e-05, + "loss": 0.0992, + "step": 80300 + }, + { + "epoch": 30.26, + "learning_rate": 1.3948061723748589e-05, + "loss": 0.1029, + "step": 80400 + }, + { + "epoch": 30.3, + "learning_rate": 1.3940534437335343e-05, + "loss": 0.1009, + "step": 80500 + }, + { + "epoch": 30.33, + "learning_rate": 1.3933007150922094e-05, + "loss": 0.1011, + "step": 80600 + }, + { + "epoch": 30.37, + "learning_rate": 1.3925479864508846e-05, + "loss": 0.1003, + "step": 80700 + }, + { + "epoch": 30.41, + "learning_rate": 1.3917952578095597e-05, + "loss": 0.1011, + "step": 80800 + }, + { + "epoch": 30.45, + "learning_rate": 1.391042529168235e-05, + "loss": 0.1003, + "step": 80900 + }, + { + "epoch": 30.49, + "learning_rate": 1.39028980052691e-05, + "loss": 0.1011, + "step": 81000 + }, + { + "epoch": 30.52, + "learning_rate": 1.3895370718855855e-05, + "loss": 0.1004, + "step": 81100 + }, + { + "epoch": 30.56, + "learning_rate": 1.3887843432442605e-05, + "loss": 0.1006, + "step": 81200 + }, + { + "epoch": 30.6, + "learning_rate": 1.3880316146029358e-05, + "loss": 0.1019, + "step": 81300 + }, + { + "epoch": 30.64, + "learning_rate": 1.3872788859616109e-05, + "loss": 0.1013, + "step": 81400 + }, + { + "epoch": 30.67, + "learning_rate": 1.3865261573202861e-05, + "loss": 0.0997, + "step": 81500 + }, + { + "epoch": 30.71, + "learning_rate": 1.3857734286789612e-05, + "loss": 0.1, + "step": 81600 + }, + { + "epoch": 30.75, + "learning_rate": 1.3850207000376366e-05, + "loss": 0.1004, + "step": 81700 + }, + { + "epoch": 30.79, + "learning_rate": 1.3842679713963117e-05, + "loss": 0.101, + "step": 81800 + }, + { + "epoch": 30.82, + "learning_rate": 1.383515242754987e-05, + "loss": 0.1003, + "step": 81900 + }, + { + "epoch": 30.86, + "learning_rate": 1.3827625141136622e-05, + "loss": 0.1, + "step": 82000 + }, + { + "epoch": 30.9, + "learning_rate": 1.3820097854723373e-05, + "loss": 0.1003, + "step": 82100 + }, + { + "epoch": 30.94, + "learning_rate": 1.3812570568310126e-05, + "loss": 0.0997, + "step": 82200 + }, + { + "epoch": 30.97, + "learning_rate": 1.3805043281896876e-05, + "loss": 0.0997, + "step": 82300 + }, + { + "epoch": 31.0, + "eval_loss": 0.10071013867855072, + "eval_runtime": 45.6947, + "eval_samples_per_second": 164.133, + "eval_steps_per_second": 10.264, + "step": 82367 + }, + { + "epoch": 31.01, + "learning_rate": 1.379751599548363e-05, + "loss": 0.0994, + "step": 82400 + }, + { + "epoch": 31.05, + "learning_rate": 1.3789988709070381e-05, + "loss": 0.1028, + "step": 82500 + }, + { + "epoch": 31.09, + "learning_rate": 1.3782461422657134e-05, + "loss": 0.1002, + "step": 82600 + }, + { + "epoch": 31.13, + "learning_rate": 1.3774934136243885e-05, + "loss": 0.0998, + "step": 82700 + }, + { + "epoch": 31.16, + "learning_rate": 1.3767406849830637e-05, + "loss": 0.1013, + "step": 82800 + }, + { + "epoch": 31.2, + "learning_rate": 1.3759879563417388e-05, + "loss": 0.1021, + "step": 82900 + }, + { + "epoch": 31.24, + "learning_rate": 1.3752352277004142e-05, + "loss": 0.1008, + "step": 83000 + }, + { + "epoch": 31.28, + "learning_rate": 1.3744824990590893e-05, + "loss": 0.101, + "step": 83100 + }, + { + "epoch": 31.31, + "learning_rate": 1.3737297704177646e-05, + "loss": 0.1016, + "step": 83200 + }, + { + "epoch": 31.35, + "learning_rate": 1.3729770417764396e-05, + "loss": 0.0993, + "step": 83300 + }, + { + "epoch": 31.39, + "learning_rate": 1.3722243131351149e-05, + "loss": 0.0993, + "step": 83400 + }, + { + "epoch": 31.43, + "learning_rate": 1.37147158449379e-05, + "loss": 0.0998, + "step": 83500 + }, + { + "epoch": 31.46, + "learning_rate": 1.3707188558524654e-05, + "loss": 0.1008, + "step": 83600 + }, + { + "epoch": 31.5, + "learning_rate": 1.3699661272111405e-05, + "loss": 0.0989, + "step": 83700 + }, + { + "epoch": 31.54, + "learning_rate": 1.3692133985698157e-05, + "loss": 0.1027, + "step": 83800 + }, + { + "epoch": 31.58, + "learning_rate": 1.3684606699284908e-05, + "loss": 0.1001, + "step": 83900 + }, + { + "epoch": 31.61, + "learning_rate": 1.367707941287166e-05, + "loss": 0.1006, + "step": 84000 + }, + { + "epoch": 31.65, + "learning_rate": 1.3669552126458411e-05, + "loss": 0.0991, + "step": 84100 + }, + { + "epoch": 31.69, + "learning_rate": 1.3662024840045166e-05, + "loss": 0.1005, + "step": 84200 + }, + { + "epoch": 31.73, + "learning_rate": 1.3654497553631916e-05, + "loss": 0.099, + "step": 84300 + }, + { + "epoch": 31.77, + "learning_rate": 1.3646970267218669e-05, + "loss": 0.1002, + "step": 84400 + }, + { + "epoch": 31.8, + "learning_rate": 1.3639442980805421e-05, + "loss": 0.1001, + "step": 84500 + }, + { + "epoch": 31.84, + "learning_rate": 1.3631915694392172e-05, + "loss": 0.0988, + "step": 84600 + }, + { + "epoch": 31.88, + "learning_rate": 1.3624388407978926e-05, + "loss": 0.0998, + "step": 84700 + }, + { + "epoch": 31.92, + "learning_rate": 1.3616861121565677e-05, + "loss": 0.0996, + "step": 84800 + }, + { + "epoch": 31.95, + "learning_rate": 1.360933383515243e-05, + "loss": 0.1022, + "step": 84900 + }, + { + "epoch": 31.99, + "learning_rate": 1.360180654873918e-05, + "loss": 0.0998, + "step": 85000 + }, + { + "epoch": 32.0, + "eval_loss": 0.10160314291715622, + "eval_runtime": 45.6356, + "eval_samples_per_second": 164.345, + "eval_steps_per_second": 10.277, + "step": 85024 + }, + { + "epoch": 32.03, + "learning_rate": 1.3594279262325933e-05, + "loss": 0.1026, + "step": 85100 + }, + { + "epoch": 32.07, + "learning_rate": 1.3586751975912684e-05, + "loss": 0.0999, + "step": 85200 + }, + { + "epoch": 32.1, + "learning_rate": 1.3579224689499436e-05, + "loss": 0.101, + "step": 85300 + }, + { + "epoch": 32.14, + "learning_rate": 1.3571697403086187e-05, + "loss": 0.1007, + "step": 85400 + }, + { + "epoch": 32.18, + "learning_rate": 1.3564170116672942e-05, + "loss": 0.1001, + "step": 85500 + }, + { + "epoch": 32.22, + "learning_rate": 1.3556642830259692e-05, + "loss": 0.1014, + "step": 85600 + }, + { + "epoch": 32.25, + "learning_rate": 1.3549115543846445e-05, + "loss": 0.0993, + "step": 85700 + }, + { + "epoch": 32.29, + "learning_rate": 1.3541588257433196e-05, + "loss": 0.1004, + "step": 85800 + }, + { + "epoch": 32.33, + "learning_rate": 1.3534060971019948e-05, + "loss": 0.1015, + "step": 85900 + }, + { + "epoch": 32.37, + "learning_rate": 1.3526533684606699e-05, + "loss": 0.0991, + "step": 86000 + }, + { + "epoch": 32.4, + "learning_rate": 1.3519006398193453e-05, + "loss": 0.0984, + "step": 86100 + }, + { + "epoch": 32.44, + "learning_rate": 1.3511479111780204e-05, + "loss": 0.1011, + "step": 86200 + }, + { + "epoch": 32.48, + "learning_rate": 1.3503951825366957e-05, + "loss": 0.1, + "step": 86300 + }, + { + "epoch": 32.52, + "learning_rate": 1.3496424538953707e-05, + "loss": 0.1009, + "step": 86400 + }, + { + "epoch": 32.56, + "learning_rate": 1.348889725254046e-05, + "loss": 0.0996, + "step": 86500 + }, + { + "epoch": 32.59, + "learning_rate": 1.348136996612721e-05, + "loss": 0.1003, + "step": 86600 + }, + { + "epoch": 32.63, + "learning_rate": 1.3473842679713965e-05, + "loss": 0.0997, + "step": 86700 + }, + { + "epoch": 32.67, + "learning_rate": 1.3466315393300716e-05, + "loss": 0.1016, + "step": 86800 + }, + { + "epoch": 32.71, + "learning_rate": 1.3458788106887468e-05, + "loss": 0.1003, + "step": 86900 + }, + { + "epoch": 32.74, + "learning_rate": 1.345126082047422e-05, + "loss": 0.0984, + "step": 87000 + }, + { + "epoch": 32.78, + "learning_rate": 1.3443733534060972e-05, + "loss": 0.1008, + "step": 87100 + }, + { + "epoch": 32.82, + "learning_rate": 1.3436206247647726e-05, + "loss": 0.0999, + "step": 87200 + }, + { + "epoch": 32.86, + "learning_rate": 1.3428678961234477e-05, + "loss": 0.0996, + "step": 87300 + }, + { + "epoch": 32.89, + "learning_rate": 1.3421151674821229e-05, + "loss": 0.101, + "step": 87400 + }, + { + "epoch": 32.93, + "learning_rate": 1.341362438840798e-05, + "loss": 0.099, + "step": 87500 + }, + { + "epoch": 32.97, + "learning_rate": 1.3406097101994732e-05, + "loss": 0.1019, + "step": 87600 + }, + { + "epoch": 33.0, + "eval_loss": 0.10076244920492172, + "eval_runtime": 45.2424, + "eval_samples_per_second": 165.774, + "eval_steps_per_second": 10.366, + "step": 87681 + }, + { + "epoch": 33.01, + "learning_rate": 1.3398569815581483e-05, + "loss": 0.1001, + "step": 87700 + }, + { + "epoch": 33.04, + "learning_rate": 1.3391042529168237e-05, + "loss": 0.1017, + "step": 87800 + }, + { + "epoch": 33.08, + "learning_rate": 1.3383515242754988e-05, + "loss": 0.1, + "step": 87900 + }, + { + "epoch": 33.12, + "learning_rate": 1.337598795634174e-05, + "loss": 0.0983, + "step": 88000 + }, + { + "epoch": 33.16, + "learning_rate": 1.3368460669928492e-05, + "loss": 0.1, + "step": 88100 + }, + { + "epoch": 33.2, + "learning_rate": 1.3360933383515244e-05, + "loss": 0.1008, + "step": 88200 + }, + { + "epoch": 33.23, + "learning_rate": 1.3353406097101995e-05, + "loss": 0.0998, + "step": 88300 + }, + { + "epoch": 33.27, + "learning_rate": 1.3345878810688747e-05, + "loss": 0.1, + "step": 88400 + }, + { + "epoch": 33.31, + "learning_rate": 1.3338351524275498e-05, + "loss": 0.0997, + "step": 88500 + }, + { + "epoch": 33.35, + "learning_rate": 1.3330824237862252e-05, + "loss": 0.0995, + "step": 88600 + }, + { + "epoch": 33.38, + "learning_rate": 1.3323296951449003e-05, + "loss": 0.1015, + "step": 88700 + }, + { + "epoch": 33.42, + "learning_rate": 1.3315769665035756e-05, + "loss": 0.0993, + "step": 88800 + }, + { + "epoch": 33.46, + "learning_rate": 1.3308242378622507e-05, + "loss": 0.0986, + "step": 88900 + }, + { + "epoch": 33.5, + "learning_rate": 1.330071509220926e-05, + "loss": 0.1003, + "step": 89000 + }, + { + "epoch": 33.53, + "learning_rate": 1.329318780579601e-05, + "loss": 0.0997, + "step": 89100 + }, + { + "epoch": 33.57, + "learning_rate": 1.3285660519382764e-05, + "loss": 0.0993, + "step": 89200 + }, + { + "epoch": 33.61, + "learning_rate": 1.3278133232969515e-05, + "loss": 0.1017, + "step": 89300 + }, + { + "epoch": 33.65, + "learning_rate": 1.3270605946556268e-05, + "loss": 0.1003, + "step": 89400 + }, + { + "epoch": 33.68, + "learning_rate": 1.326307866014302e-05, + "loss": 0.1012, + "step": 89500 + }, + { + "epoch": 33.72, + "learning_rate": 1.3255551373729771e-05, + "loss": 0.1006, + "step": 89600 + }, + { + "epoch": 33.76, + "learning_rate": 1.3248024087316525e-05, + "loss": 0.0976, + "step": 89700 + }, + { + "epoch": 33.8, + "learning_rate": 1.3240496800903276e-05, + "loss": 0.1002, + "step": 89800 + }, + { + "epoch": 33.84, + "learning_rate": 1.3232969514490028e-05, + "loss": 0.0984, + "step": 89900 + }, + { + "epoch": 33.87, + "learning_rate": 1.322544222807678e-05, + "loss": 0.0987, + "step": 90000 + }, + { + "epoch": 33.91, + "learning_rate": 1.3217914941663532e-05, + "loss": 0.1008, + "step": 90100 + }, + { + "epoch": 33.95, + "learning_rate": 1.3210387655250283e-05, + "loss": 0.1031, + "step": 90200 + }, + { + "epoch": 33.99, + "learning_rate": 1.3202860368837037e-05, + "loss": 0.0999, + "step": 90300 + }, + { + "epoch": 34.0, + "eval_loss": 0.10001099109649658, + "eval_runtime": 45.4099, + "eval_samples_per_second": 165.162, + "eval_steps_per_second": 10.328, + "step": 90338 + }, + { + "epoch": 34.02, + "learning_rate": 1.3195333082423788e-05, + "loss": 0.0995, + "step": 90400 + }, + { + "epoch": 34.06, + "learning_rate": 1.318780579601054e-05, + "loss": 0.1017, + "step": 90500 + }, + { + "epoch": 34.1, + "learning_rate": 1.3180278509597291e-05, + "loss": 0.101, + "step": 90600 + }, + { + "epoch": 34.14, + "learning_rate": 1.3172751223184043e-05, + "loss": 0.1002, + "step": 90700 + }, + { + "epoch": 34.17, + "learning_rate": 1.3165223936770794e-05, + "loss": 0.0988, + "step": 90800 + }, + { + "epoch": 34.21, + "learning_rate": 1.3157696650357548e-05, + "loss": 0.101, + "step": 90900 + }, + { + "epoch": 34.25, + "learning_rate": 1.31501693639443e-05, + "loss": 0.0999, + "step": 91000 + }, + { + "epoch": 34.29, + "learning_rate": 1.3142642077531052e-05, + "loss": 0.0994, + "step": 91100 + }, + { + "epoch": 34.32, + "learning_rate": 1.3135114791117803e-05, + "loss": 0.1002, + "step": 91200 + }, + { + "epoch": 34.36, + "learning_rate": 1.3127587504704555e-05, + "loss": 0.0987, + "step": 91300 + }, + { + "epoch": 34.4, + "learning_rate": 1.3120060218291306e-05, + "loss": 0.1001, + "step": 91400 + }, + { + "epoch": 34.44, + "learning_rate": 1.3112532931878058e-05, + "loss": 0.099, + "step": 91500 + }, + { + "epoch": 34.47, + "learning_rate": 1.310500564546481e-05, + "loss": 0.0987, + "step": 91600 + }, + { + "epoch": 34.51, + "learning_rate": 1.3097478359051563e-05, + "loss": 0.0997, + "step": 91700 + }, + { + "epoch": 34.55, + "learning_rate": 1.3089951072638314e-05, + "loss": 0.1008, + "step": 91800 + }, + { + "epoch": 34.59, + "learning_rate": 1.3082423786225067e-05, + "loss": 0.0991, + "step": 91900 + }, + { + "epoch": 34.63, + "learning_rate": 1.307489649981182e-05, + "loss": 0.0995, + "step": 92000 + }, + { + "epoch": 34.66, + "learning_rate": 1.306736921339857e-05, + "loss": 0.1001, + "step": 92100 + }, + { + "epoch": 34.7, + "learning_rate": 1.3059841926985324e-05, + "loss": 0.0991, + "step": 92200 + }, + { + "epoch": 34.74, + "learning_rate": 1.3052314640572075e-05, + "loss": 0.0977, + "step": 92300 + }, + { + "epoch": 34.78, + "learning_rate": 1.3044787354158828e-05, + "loss": 0.0993, + "step": 92400 + }, + { + "epoch": 34.81, + "learning_rate": 1.3037260067745578e-05, + "loss": 0.1001, + "step": 92500 + }, + { + "epoch": 34.85, + "learning_rate": 1.3029732781332331e-05, + "loss": 0.1, + "step": 92600 + }, + { + "epoch": 34.89, + "learning_rate": 1.3022205494919082e-05, + "loss": 0.0998, + "step": 92700 + }, + { + "epoch": 34.93, + "learning_rate": 1.3014678208505836e-05, + "loss": 0.0996, + "step": 92800 + }, + { + "epoch": 34.96, + "learning_rate": 1.3007150922092587e-05, + "loss": 0.0998, + "step": 92900 + }, + { + "epoch": 35.0, + "eval_loss": 0.09930834919214249, + "eval_runtime": 45.6646, + "eval_samples_per_second": 164.241, + "eval_steps_per_second": 10.271, + "step": 92995 + }, + { + "epoch": 35.0, + "learning_rate": 1.299962363567934e-05, + "loss": 0.1003, + "step": 93000 + }, + { + "epoch": 35.04, + "learning_rate": 1.299209634926609e-05, + "loss": 0.0996, + "step": 93100 + }, + { + "epoch": 35.08, + "learning_rate": 1.2984569062852843e-05, + "loss": 0.0986, + "step": 93200 + }, + { + "epoch": 35.11, + "learning_rate": 1.2977041776439594e-05, + "loss": 0.0999, + "step": 93300 + }, + { + "epoch": 35.15, + "learning_rate": 1.2969514490026348e-05, + "loss": 0.1006, + "step": 93400 + }, + { + "epoch": 35.19, + "learning_rate": 1.2961987203613099e-05, + "loss": 0.0999, + "step": 93500 + }, + { + "epoch": 35.23, + "learning_rate": 1.2954459917199851e-05, + "loss": 0.0984, + "step": 93600 + }, + { + "epoch": 35.27, + "learning_rate": 1.2946932630786602e-05, + "loss": 0.0981, + "step": 93700 + }, + { + "epoch": 35.3, + "learning_rate": 1.2939405344373354e-05, + "loss": 0.1004, + "step": 93800 + }, + { + "epoch": 35.34, + "learning_rate": 1.2931878057960105e-05, + "loss": 0.0994, + "step": 93900 + }, + { + "epoch": 35.38, + "learning_rate": 1.292435077154686e-05, + "loss": 0.0984, + "step": 94000 + }, + { + "epoch": 35.42, + "learning_rate": 1.291682348513361e-05, + "loss": 0.1002, + "step": 94100 + }, + { + "epoch": 35.45, + "learning_rate": 1.2909296198720363e-05, + "loss": 0.0997, + "step": 94200 + }, + { + "epoch": 35.49, + "learning_rate": 1.2901768912307114e-05, + "loss": 0.0977, + "step": 94300 + }, + { + "epoch": 35.53, + "learning_rate": 1.2894241625893866e-05, + "loss": 0.0991, + "step": 94400 + }, + { + "epoch": 35.57, + "learning_rate": 1.2886714339480619e-05, + "loss": 0.0981, + "step": 94500 + }, + { + "epoch": 35.6, + "learning_rate": 1.287918705306737e-05, + "loss": 0.0998, + "step": 94600 + }, + { + "epoch": 35.64, + "learning_rate": 1.2871659766654124e-05, + "loss": 0.0999, + "step": 94700 + }, + { + "epoch": 35.68, + "learning_rate": 1.2864132480240874e-05, + "loss": 0.0989, + "step": 94800 + }, + { + "epoch": 35.72, + "learning_rate": 1.2856605193827627e-05, + "loss": 0.1, + "step": 94900 + }, + { + "epoch": 35.75, + "learning_rate": 1.2849077907414378e-05, + "loss": 0.1003, + "step": 95000 + }, + { + "epoch": 35.79, + "learning_rate": 1.284155062100113e-05, + "loss": 0.0997, + "step": 95100 + }, + { + "epoch": 35.83, + "learning_rate": 1.2834023334587881e-05, + "loss": 0.1002, + "step": 95200 + }, + { + "epoch": 35.87, + "learning_rate": 1.2826496048174635e-05, + "loss": 0.0986, + "step": 95300 + }, + { + "epoch": 35.91, + "learning_rate": 1.2818968761761386e-05, + "loss": 0.0999, + "step": 95400 + }, + { + "epoch": 35.94, + "learning_rate": 1.2811441475348139e-05, + "loss": 0.1005, + "step": 95500 + }, + { + "epoch": 35.98, + "learning_rate": 1.280391418893489e-05, + "loss": 0.0994, + "step": 95600 + }, + { + "epoch": 36.0, + "eval_loss": 0.09918170422315598, + "eval_runtime": 45.6422, + "eval_samples_per_second": 164.321, + "eval_steps_per_second": 10.276, + "step": 95652 + }, + { + "epoch": 36.02, + "learning_rate": 1.2796386902521642e-05, + "loss": 0.0979, + "step": 95700 + }, + { + "epoch": 36.06, + "learning_rate": 1.2788859616108393e-05, + "loss": 0.0981, + "step": 95800 + }, + { + "epoch": 36.09, + "learning_rate": 1.2781332329695147e-05, + "loss": 0.0992, + "step": 95900 + }, + { + "epoch": 36.13, + "learning_rate": 1.2773805043281898e-05, + "loss": 0.0991, + "step": 96000 + }, + { + "epoch": 36.17, + "learning_rate": 1.276627775686865e-05, + "loss": 0.1003, + "step": 96100 + }, + { + "epoch": 36.21, + "learning_rate": 1.2758750470455401e-05, + "loss": 0.0991, + "step": 96200 + }, + { + "epoch": 36.24, + "learning_rate": 1.2751223184042154e-05, + "loss": 0.0985, + "step": 96300 + }, + { + "epoch": 36.28, + "learning_rate": 1.2743695897628904e-05, + "loss": 0.0993, + "step": 96400 + }, + { + "epoch": 36.32, + "learning_rate": 1.2736168611215659e-05, + "loss": 0.0998, + "step": 96500 + }, + { + "epoch": 36.36, + "learning_rate": 1.272864132480241e-05, + "loss": 0.1014, + "step": 96600 + }, + { + "epoch": 36.39, + "learning_rate": 1.2721114038389162e-05, + "loss": 0.1005, + "step": 96700 + }, + { + "epoch": 36.43, + "learning_rate": 1.2713586751975913e-05, + "loss": 0.0992, + "step": 96800 + }, + { + "epoch": 36.47, + "learning_rate": 1.2706059465562665e-05, + "loss": 0.0981, + "step": 96900 + }, + { + "epoch": 36.51, + "learning_rate": 1.2698532179149418e-05, + "loss": 0.0988, + "step": 97000 + }, + { + "epoch": 36.54, + "learning_rate": 1.2691004892736169e-05, + "loss": 0.0978, + "step": 97100 + }, + { + "epoch": 36.58, + "learning_rate": 1.2683477606322923e-05, + "loss": 0.0989, + "step": 97200 + }, + { + "epoch": 36.62, + "learning_rate": 1.2675950319909674e-05, + "loss": 0.098, + "step": 97300 + }, + { + "epoch": 36.66, + "learning_rate": 1.2668423033496426e-05, + "loss": 0.0979, + "step": 97400 + }, + { + "epoch": 36.7, + "learning_rate": 1.2660895747083177e-05, + "loss": 0.0989, + "step": 97500 + }, + { + "epoch": 36.73, + "learning_rate": 1.265336846066993e-05, + "loss": 0.0993, + "step": 97600 + }, + { + "epoch": 36.77, + "learning_rate": 1.264584117425668e-05, + "loss": 0.0988, + "step": 97700 + }, + { + "epoch": 36.81, + "learning_rate": 1.2638313887843435e-05, + "loss": 0.0979, + "step": 97800 + }, + { + "epoch": 36.85, + "learning_rate": 1.2630786601430185e-05, + "loss": 0.0982, + "step": 97900 + }, + { + "epoch": 36.88, + "learning_rate": 1.2623259315016938e-05, + "loss": 0.0992, + "step": 98000 + }, + { + "epoch": 36.92, + "learning_rate": 1.2615732028603689e-05, + "loss": 0.1, + "step": 98100 + }, + { + "epoch": 36.96, + "learning_rate": 1.2608204742190441e-05, + "loss": 0.0977, + "step": 98200 + }, + { + "epoch": 37.0, + "learning_rate": 1.2600677455777192e-05, + "loss": 0.0966, + "step": 98300 + }, + { + "epoch": 37.0, + "eval_loss": 0.09910181164741516, + "eval_runtime": 45.3338, + "eval_samples_per_second": 165.439, + "eval_steps_per_second": 10.345, + "step": 98309 + }, + { + "epoch": 37.03, + "learning_rate": 1.2593150169363946e-05, + "loss": 0.0961, + "step": 98400 + }, + { + "epoch": 37.07, + "learning_rate": 1.2585622882950697e-05, + "loss": 0.0995, + "step": 98500 + }, + { + "epoch": 37.11, + "learning_rate": 1.257809559653745e-05, + "loss": 0.0996, + "step": 98600 + }, + { + "epoch": 37.15, + "learning_rate": 1.25705683101242e-05, + "loss": 0.0982, + "step": 98700 + }, + { + "epoch": 37.18, + "learning_rate": 1.2563041023710953e-05, + "loss": 0.0985, + "step": 98800 + }, + { + "epoch": 37.22, + "learning_rate": 1.2555513737297704e-05, + "loss": 0.0968, + "step": 98900 + }, + { + "epoch": 37.26, + "learning_rate": 1.2547986450884458e-05, + "loss": 0.0982, + "step": 99000 + }, + { + "epoch": 37.3, + "learning_rate": 1.2540459164471209e-05, + "loss": 0.0994, + "step": 99100 + }, + { + "epoch": 37.34, + "learning_rate": 1.2532931878057961e-05, + "loss": 0.1004, + "step": 99200 + }, + { + "epoch": 37.37, + "learning_rate": 1.2525404591644712e-05, + "loss": 0.0988, + "step": 99300 + }, + { + "epoch": 37.41, + "learning_rate": 1.2517877305231465e-05, + "loss": 0.099, + "step": 99400 + }, + { + "epoch": 37.45, + "learning_rate": 1.2510350018818219e-05, + "loss": 0.0991, + "step": 99500 + }, + { + "epoch": 37.49, + "learning_rate": 1.250282273240497e-05, + "loss": 0.0978, + "step": 99600 + }, + { + "epoch": 37.52, + "learning_rate": 1.2495295445991722e-05, + "loss": 0.0981, + "step": 99700 + }, + { + "epoch": 37.56, + "learning_rate": 1.2487768159578473e-05, + "loss": 0.0989, + "step": 99800 + }, + { + "epoch": 37.6, + "learning_rate": 1.2480240873165226e-05, + "loss": 0.0982, + "step": 99900 + }, + { + "epoch": 37.64, + "learning_rate": 1.2472713586751976e-05, + "loss": 0.1001, + "step": 100000 + }, + { + "epoch": 37.67, + "learning_rate": 1.2465186300338729e-05, + "loss": 0.0972, + "step": 100100 + }, + { + "epoch": 37.71, + "learning_rate": 1.245765901392548e-05, + "loss": 0.0993, + "step": 100200 + }, + { + "epoch": 37.75, + "learning_rate": 1.2450131727512234e-05, + "loss": 0.0997, + "step": 100300 + }, + { + "epoch": 37.79, + "learning_rate": 1.2442604441098985e-05, + "loss": 0.0978, + "step": 100400 + }, + { + "epoch": 37.82, + "learning_rate": 1.2435077154685737e-05, + "loss": 0.0992, + "step": 100500 + }, + { + "epoch": 37.86, + "learning_rate": 1.2427549868272488e-05, + "loss": 0.0992, + "step": 100600 + }, + { + "epoch": 37.9, + "learning_rate": 1.242002258185924e-05, + "loss": 0.0984, + "step": 100700 + }, + { + "epoch": 37.94, + "learning_rate": 1.2412495295445991e-05, + "loss": 0.0983, + "step": 100800 + }, + { + "epoch": 37.98, + "learning_rate": 1.2404968009032746e-05, + "loss": 0.0997, + "step": 100900 + }, + { + "epoch": 38.0, + "eval_loss": 0.09699103981256485, + "eval_runtime": 45.3352, + "eval_samples_per_second": 165.435, + "eval_steps_per_second": 10.345, + "step": 100966 + }, + { + "epoch": 38.01, + "learning_rate": 1.2397440722619496e-05, + "loss": 0.0983, + "step": 101000 + }, + { + "epoch": 38.05, + "learning_rate": 1.2389913436206249e-05, + "loss": 0.0984, + "step": 101100 + }, + { + "epoch": 38.09, + "learning_rate": 1.2382386149793e-05, + "loss": 0.0971, + "step": 101200 + }, + { + "epoch": 38.13, + "learning_rate": 1.2374858863379752e-05, + "loss": 0.0979, + "step": 101300 + }, + { + "epoch": 38.16, + "learning_rate": 1.2367331576966503e-05, + "loss": 0.0992, + "step": 101400 + }, + { + "epoch": 38.2, + "learning_rate": 1.2359804290553257e-05, + "loss": 0.0989, + "step": 101500 + }, + { + "epoch": 38.24, + "learning_rate": 1.2352277004140008e-05, + "loss": 0.0988, + "step": 101600 + }, + { + "epoch": 38.28, + "learning_rate": 1.234474971772676e-05, + "loss": 0.098, + "step": 101700 + }, + { + "epoch": 38.31, + "learning_rate": 1.2337222431313511e-05, + "loss": 0.0961, + "step": 101800 + }, + { + "epoch": 38.35, + "learning_rate": 1.2329695144900264e-05, + "loss": 0.0978, + "step": 101900 + }, + { + "epoch": 38.39, + "learning_rate": 1.2322167858487018e-05, + "loss": 0.1003, + "step": 102000 + }, + { + "epoch": 38.43, + "learning_rate": 1.2314640572073769e-05, + "loss": 0.0989, + "step": 102100 + }, + { + "epoch": 38.46, + "learning_rate": 1.2307113285660521e-05, + "loss": 0.0984, + "step": 102200 + }, + { + "epoch": 38.5, + "learning_rate": 1.2299585999247272e-05, + "loss": 0.0985, + "step": 102300 + }, + { + "epoch": 38.54, + "learning_rate": 1.2292058712834025e-05, + "loss": 0.1008, + "step": 102400 + }, + { + "epoch": 38.58, + "learning_rate": 1.2284531426420776e-05, + "loss": 0.0969, + "step": 102500 + }, + { + "epoch": 38.61, + "learning_rate": 1.227700414000753e-05, + "loss": 0.0981, + "step": 102600 + }, + { + "epoch": 38.65, + "learning_rate": 1.226947685359428e-05, + "loss": 0.0979, + "step": 102700 + }, + { + "epoch": 38.69, + "learning_rate": 1.2261949567181033e-05, + "loss": 0.0994, + "step": 102800 + }, + { + "epoch": 38.73, + "learning_rate": 1.2254422280767784e-05, + "loss": 0.0992, + "step": 102900 + }, + { + "epoch": 38.77, + "learning_rate": 1.2246894994354536e-05, + "loss": 0.0981, + "step": 103000 + }, + { + "epoch": 38.8, + "learning_rate": 1.2239367707941287e-05, + "loss": 0.0991, + "step": 103100 + }, + { + "epoch": 38.84, + "learning_rate": 1.223184042152804e-05, + "loss": 0.0977, + "step": 103200 + }, + { + "epoch": 38.88, + "learning_rate": 1.222431313511479e-05, + "loss": 0.0979, + "step": 103300 + }, + { + "epoch": 38.92, + "learning_rate": 1.2216785848701545e-05, + "loss": 0.0976, + "step": 103400 + }, + { + "epoch": 38.95, + "learning_rate": 1.2209258562288296e-05, + "loss": 0.0996, + "step": 103500 + }, + { + "epoch": 38.99, + "learning_rate": 1.2201731275875048e-05, + "loss": 0.0991, + "step": 103600 + }, + { + "epoch": 39.0, + "eval_loss": 0.09791671484708786, + "eval_runtime": 45.4451, + "eval_samples_per_second": 165.034, + "eval_steps_per_second": 10.32, + "step": 103623 + }, + { + "epoch": 39.03, + "learning_rate": 1.2194203989461799e-05, + "loss": 0.098, + "step": 103700 + }, + { + "epoch": 39.07, + "learning_rate": 1.2186676703048552e-05, + "loss": 0.0965, + "step": 103800 + }, + { + "epoch": 39.1, + "learning_rate": 1.2179149416635302e-05, + "loss": 0.0979, + "step": 103900 + }, + { + "epoch": 39.14, + "learning_rate": 1.2171622130222057e-05, + "loss": 0.0978, + "step": 104000 + }, + { + "epoch": 39.18, + "learning_rate": 1.2164094843808807e-05, + "loss": 0.0996, + "step": 104100 + }, + { + "epoch": 39.22, + "learning_rate": 1.215656755739556e-05, + "loss": 0.0995, + "step": 104200 + }, + { + "epoch": 39.25, + "learning_rate": 1.2149040270982312e-05, + "loss": 0.0988, + "step": 104300 + }, + { + "epoch": 39.29, + "learning_rate": 1.2141512984569063e-05, + "loss": 0.0975, + "step": 104400 + }, + { + "epoch": 39.33, + "learning_rate": 1.2133985698155817e-05, + "loss": 0.098, + "step": 104500 + }, + { + "epoch": 39.37, + "learning_rate": 1.2126458411742568e-05, + "loss": 0.098, + "step": 104600 + }, + { + "epoch": 39.41, + "learning_rate": 1.211893112532932e-05, + "loss": 0.098, + "step": 104700 + }, + { + "epoch": 39.44, + "learning_rate": 1.2111403838916072e-05, + "loss": 0.0995, + "step": 104800 + }, + { + "epoch": 39.48, + "learning_rate": 1.2103876552502824e-05, + "loss": 0.0977, + "step": 104900 + }, + { + "epoch": 39.52, + "learning_rate": 1.2096349266089575e-05, + "loss": 0.0988, + "step": 105000 + }, + { + "epoch": 39.56, + "learning_rate": 1.2088821979676329e-05, + "loss": 0.0986, + "step": 105100 + }, + { + "epoch": 39.59, + "learning_rate": 1.208129469326308e-05, + "loss": 0.0987, + "step": 105200 + }, + { + "epoch": 39.63, + "learning_rate": 1.2073767406849832e-05, + "loss": 0.0979, + "step": 105300 + }, + { + "epoch": 39.67, + "learning_rate": 1.2066240120436583e-05, + "loss": 0.0963, + "step": 105400 + }, + { + "epoch": 39.71, + "learning_rate": 1.2058712834023336e-05, + "loss": 0.0978, + "step": 105500 + }, + { + "epoch": 39.74, + "learning_rate": 1.2051185547610087e-05, + "loss": 0.0989, + "step": 105600 + }, + { + "epoch": 39.78, + "learning_rate": 1.204365826119684e-05, + "loss": 0.0971, + "step": 105700 + }, + { + "epoch": 39.82, + "learning_rate": 1.2036130974783592e-05, + "loss": 0.0985, + "step": 105800 + }, + { + "epoch": 39.86, + "learning_rate": 1.2028603688370344e-05, + "loss": 0.0964, + "step": 105900 + }, + { + "epoch": 39.89, + "learning_rate": 1.2021076401957095e-05, + "loss": 0.0968, + "step": 106000 + }, + { + "epoch": 39.93, + "learning_rate": 1.2013549115543847e-05, + "loss": 0.0965, + "step": 106100 + }, + { + "epoch": 39.97, + "learning_rate": 1.2006021829130598e-05, + "loss": 0.099, + "step": 106200 + }, + { + "epoch": 40.0, + "eval_loss": 0.09832270443439484, + "eval_runtime": 45.1549, + "eval_samples_per_second": 166.095, + "eval_steps_per_second": 10.386, + "step": 106280 + }, + { + "epoch": 40.01, + "learning_rate": 1.199849454271735e-05, + "loss": 0.098, + "step": 106300 + }, + { + "epoch": 40.05, + "learning_rate": 1.1990967256304102e-05, + "loss": 0.098, + "step": 106400 + }, + { + "epoch": 40.08, + "learning_rate": 1.1983439969890856e-05, + "loss": 0.0977, + "step": 106500 + }, + { + "epoch": 40.12, + "learning_rate": 1.1975912683477607e-05, + "loss": 0.0994, + "step": 106600 + }, + { + "epoch": 40.16, + "learning_rate": 1.196838539706436e-05, + "loss": 0.0976, + "step": 106700 + }, + { + "epoch": 40.2, + "learning_rate": 1.1960858110651112e-05, + "loss": 0.098, + "step": 106800 + }, + { + "epoch": 40.23, + "learning_rate": 1.1953330824237862e-05, + "loss": 0.0978, + "step": 106900 + }, + { + "epoch": 40.27, + "learning_rate": 1.1945803537824617e-05, + "loss": 0.0981, + "step": 107000 + }, + { + "epoch": 40.31, + "learning_rate": 1.1938276251411368e-05, + "loss": 0.0971, + "step": 107100 + }, + { + "epoch": 40.35, + "learning_rate": 1.193074896499812e-05, + "loss": 0.0985, + "step": 107200 + }, + { + "epoch": 40.38, + "learning_rate": 1.1923221678584871e-05, + "loss": 0.0996, + "step": 107300 + }, + { + "epoch": 40.42, + "learning_rate": 1.1915694392171623e-05, + "loss": 0.0981, + "step": 107400 + }, + { + "epoch": 40.46, + "learning_rate": 1.1908167105758374e-05, + "loss": 0.0996, + "step": 107500 + }, + { + "epoch": 40.5, + "learning_rate": 1.1900639819345128e-05, + "loss": 0.0989, + "step": 107600 + }, + { + "epoch": 40.53, + "learning_rate": 1.189311253293188e-05, + "loss": 0.0992, + "step": 107700 + }, + { + "epoch": 40.57, + "learning_rate": 1.1885585246518632e-05, + "loss": 0.0974, + "step": 107800 + }, + { + "epoch": 40.61, + "learning_rate": 1.1878057960105383e-05, + "loss": 0.0984, + "step": 107900 + }, + { + "epoch": 40.65, + "learning_rate": 1.1870530673692135e-05, + "loss": 0.0984, + "step": 108000 + }, + { + "epoch": 40.68, + "learning_rate": 1.1863003387278886e-05, + "loss": 0.099, + "step": 108100 + }, + { + "epoch": 40.72, + "learning_rate": 1.185547610086564e-05, + "loss": 0.0984, + "step": 108200 + }, + { + "epoch": 40.76, + "learning_rate": 1.1847948814452391e-05, + "loss": 0.0966, + "step": 108300 + }, + { + "epoch": 40.8, + "learning_rate": 1.1840421528039143e-05, + "loss": 0.1, + "step": 108400 + }, + { + "epoch": 40.84, + "learning_rate": 1.1832894241625894e-05, + "loss": 0.0979, + "step": 108500 + }, + { + "epoch": 40.87, + "learning_rate": 1.1825366955212647e-05, + "loss": 0.0981, + "step": 108600 + }, + { + "epoch": 40.91, + "learning_rate": 1.1817839668799398e-05, + "loss": 0.0973, + "step": 108700 + }, + { + "epoch": 40.95, + "learning_rate": 1.1810312382386152e-05, + "loss": 0.0978, + "step": 108800 + }, + { + "epoch": 40.99, + "learning_rate": 1.1802785095972903e-05, + "loss": 0.0974, + "step": 108900 + }, + { + "epoch": 41.0, + "eval_loss": 0.09795571118593216, + "eval_runtime": 45.3802, + "eval_samples_per_second": 165.27, + "eval_steps_per_second": 10.335, + "step": 108937 + }, + { + "epoch": 41.02, + "learning_rate": 1.1795257809559655e-05, + "loss": 0.0961, + "step": 109000 + }, + { + "epoch": 41.06, + "learning_rate": 1.1787730523146406e-05, + "loss": 0.0989, + "step": 109100 + }, + { + "epoch": 41.1, + "learning_rate": 1.1780203236733158e-05, + "loss": 0.0988, + "step": 109200 + }, + { + "epoch": 41.14, + "learning_rate": 1.1772675950319911e-05, + "loss": 0.0979, + "step": 109300 + }, + { + "epoch": 41.17, + "learning_rate": 1.1765148663906662e-05, + "loss": 0.0971, + "step": 109400 + }, + { + "epoch": 41.21, + "learning_rate": 1.1757621377493416e-05, + "loss": 0.0965, + "step": 109500 + }, + { + "epoch": 41.25, + "learning_rate": 1.1750094091080167e-05, + "loss": 0.0982, + "step": 109600 + }, + { + "epoch": 41.29, + "learning_rate": 1.174256680466692e-05, + "loss": 0.0974, + "step": 109700 + }, + { + "epoch": 41.32, + "learning_rate": 1.173503951825367e-05, + "loss": 0.097, + "step": 109800 + }, + { + "epoch": 41.36, + "learning_rate": 1.1727512231840423e-05, + "loss": 0.0974, + "step": 109900 + }, + { + "epoch": 41.4, + "learning_rate": 1.1719984945427173e-05, + "loss": 0.0969, + "step": 110000 + }, + { + "epoch": 41.44, + "learning_rate": 1.1712457659013928e-05, + "loss": 0.0983, + "step": 110100 + }, + { + "epoch": 41.48, + "learning_rate": 1.1704930372600679e-05, + "loss": 0.0978, + "step": 110200 + }, + { + "epoch": 41.51, + "learning_rate": 1.1697403086187431e-05, + "loss": 0.0962, + "step": 110300 + }, + { + "epoch": 41.55, + "learning_rate": 1.1689875799774182e-05, + "loss": 0.0985, + "step": 110400 + }, + { + "epoch": 41.59, + "learning_rate": 1.1682348513360934e-05, + "loss": 0.0992, + "step": 110500 + }, + { + "epoch": 41.63, + "learning_rate": 1.1674821226947685e-05, + "loss": 0.0972, + "step": 110600 + }, + { + "epoch": 41.66, + "learning_rate": 1.166729394053444e-05, + "loss": 0.098, + "step": 110700 + }, + { + "epoch": 41.7, + "learning_rate": 1.165976665412119e-05, + "loss": 0.0991, + "step": 110800 + }, + { + "epoch": 41.74, + "learning_rate": 1.1652239367707943e-05, + "loss": 0.0963, + "step": 110900 + }, + { + "epoch": 41.78, + "learning_rate": 1.1644712081294694e-05, + "loss": 0.0983, + "step": 111000 + }, + { + "epoch": 41.81, + "learning_rate": 1.1637184794881446e-05, + "loss": 0.0988, + "step": 111100 + }, + { + "epoch": 41.85, + "learning_rate": 1.1629657508468197e-05, + "loss": 0.0977, + "step": 111200 + }, + { + "epoch": 41.89, + "learning_rate": 1.1622130222054951e-05, + "loss": 0.0984, + "step": 111300 + }, + { + "epoch": 41.93, + "learning_rate": 1.1614602935641702e-05, + "loss": 0.0974, + "step": 111400 + }, + { + "epoch": 41.96, + "learning_rate": 1.1607075649228454e-05, + "loss": 0.0974, + "step": 111500 + }, + { + "epoch": 42.0, + "eval_loss": 0.0971272811293602, + "eval_runtime": 45.4607, + "eval_samples_per_second": 164.978, + "eval_steps_per_second": 10.317, + "step": 111594 + }, + { + "epoch": 42.0, + "learning_rate": 1.1599548362815205e-05, + "loss": 0.0983, + "step": 111600 + }, + { + "epoch": 42.04, + "learning_rate": 1.1592021076401958e-05, + "loss": 0.0968, + "step": 111700 + }, + { + "epoch": 42.08, + "learning_rate": 1.1584493789988712e-05, + "loss": 0.0984, + "step": 111800 + }, + { + "epoch": 42.12, + "learning_rate": 1.1576966503575463e-05, + "loss": 0.0991, + "step": 111900 + }, + { + "epoch": 42.15, + "learning_rate": 1.1569439217162215e-05, + "loss": 0.0965, + "step": 112000 + }, + { + "epoch": 42.19, + "learning_rate": 1.1561911930748966e-05, + "loss": 0.098, + "step": 112100 + }, + { + "epoch": 42.23, + "learning_rate": 1.1554384644335719e-05, + "loss": 0.0979, + "step": 112200 + }, + { + "epoch": 42.27, + "learning_rate": 1.154685735792247e-05, + "loss": 0.0971, + "step": 112300 + }, + { + "epoch": 42.3, + "learning_rate": 1.1539330071509222e-05, + "loss": 0.0974, + "step": 112400 + }, + { + "epoch": 42.34, + "learning_rate": 1.1531802785095973e-05, + "loss": 0.0963, + "step": 112500 + }, + { + "epoch": 42.38, + "learning_rate": 1.1524275498682727e-05, + "loss": 0.097, + "step": 112600 + }, + { + "epoch": 42.42, + "learning_rate": 1.1516748212269478e-05, + "loss": 0.1001, + "step": 112700 + }, + { + "epoch": 42.45, + "learning_rate": 1.150922092585623e-05, + "loss": 0.0972, + "step": 112800 + }, + { + "epoch": 42.49, + "learning_rate": 1.1501693639442981e-05, + "loss": 0.0976, + "step": 112900 + }, + { + "epoch": 42.53, + "learning_rate": 1.1494166353029734e-05, + "loss": 0.0963, + "step": 113000 + }, + { + "epoch": 42.57, + "learning_rate": 1.1486639066616484e-05, + "loss": 0.0965, + "step": 113100 + }, + { + "epoch": 42.6, + "learning_rate": 1.1479111780203239e-05, + "loss": 0.0978, + "step": 113200 + }, + { + "epoch": 42.64, + "learning_rate": 1.147158449378999e-05, + "loss": 0.0996, + "step": 113300 + }, + { + "epoch": 42.68, + "learning_rate": 1.1464057207376742e-05, + "loss": 0.0965, + "step": 113400 + }, + { + "epoch": 42.72, + "learning_rate": 1.1456529920963493e-05, + "loss": 0.0964, + "step": 113500 + }, + { + "epoch": 42.75, + "learning_rate": 1.1449002634550245e-05, + "loss": 0.0979, + "step": 113600 + }, + { + "epoch": 42.79, + "learning_rate": 1.1441475348136996e-05, + "loss": 0.0982, + "step": 113700 + }, + { + "epoch": 42.83, + "learning_rate": 1.143394806172375e-05, + "loss": 0.0975, + "step": 113800 + }, + { + "epoch": 42.87, + "learning_rate": 1.1426420775310501e-05, + "loss": 0.0956, + "step": 113900 + }, + { + "epoch": 42.91, + "learning_rate": 1.1418893488897254e-05, + "loss": 0.0964, + "step": 114000 + }, + { + "epoch": 42.94, + "learning_rate": 1.1411366202484005e-05, + "loss": 0.0984, + "step": 114100 + }, + { + "epoch": 42.98, + "learning_rate": 1.1403838916070757e-05, + "loss": 0.0972, + "step": 114200 + }, + { + "epoch": 43.0, + "eval_loss": 0.09703872352838516, + "eval_runtime": 45.2306, + "eval_samples_per_second": 165.817, + "eval_steps_per_second": 10.369, + "step": 114251 + }, + { + "epoch": 43.02, + "learning_rate": 1.1396311629657511e-05, + "loss": 0.0971, + "step": 114300 + }, + { + "epoch": 43.06, + "learning_rate": 1.1388784343244262e-05, + "loss": 0.0988, + "step": 114400 + }, + { + "epoch": 43.09, + "learning_rate": 1.1381257056831015e-05, + "loss": 0.0966, + "step": 114500 + }, + { + "epoch": 43.13, + "learning_rate": 1.1373729770417765e-05, + "loss": 0.0964, + "step": 114600 + }, + { + "epoch": 43.17, + "learning_rate": 1.1366202484004518e-05, + "loss": 0.0984, + "step": 114700 + }, + { + "epoch": 43.21, + "learning_rate": 1.1358675197591269e-05, + "loss": 0.0978, + "step": 114800 + }, + { + "epoch": 43.24, + "learning_rate": 1.1351147911178021e-05, + "loss": 0.0975, + "step": 114900 + }, + { + "epoch": 43.28, + "learning_rate": 1.1343620624764772e-05, + "loss": 0.0976, + "step": 115000 + }, + { + "epoch": 43.32, + "learning_rate": 1.1336093338351526e-05, + "loss": 0.0965, + "step": 115100 + }, + { + "epoch": 43.36, + "learning_rate": 1.1328566051938277e-05, + "loss": 0.0961, + "step": 115200 + }, + { + "epoch": 43.39, + "learning_rate": 1.132103876552503e-05, + "loss": 0.0964, + "step": 115300 + }, + { + "epoch": 43.43, + "learning_rate": 1.131351147911178e-05, + "loss": 0.0969, + "step": 115400 + }, + { + "epoch": 43.47, + "learning_rate": 1.1305984192698533e-05, + "loss": 0.097, + "step": 115500 + }, + { + "epoch": 43.51, + "learning_rate": 1.1298456906285284e-05, + "loss": 0.0977, + "step": 115600 + }, + { + "epoch": 43.55, + "learning_rate": 1.1290929619872038e-05, + "loss": 0.0981, + "step": 115700 + }, + { + "epoch": 43.58, + "learning_rate": 1.1283402333458789e-05, + "loss": 0.0973, + "step": 115800 + }, + { + "epoch": 43.62, + "learning_rate": 1.1275875047045541e-05, + "loss": 0.0968, + "step": 115900 + }, + { + "epoch": 43.66, + "learning_rate": 1.1268347760632292e-05, + "loss": 0.0962, + "step": 116000 + }, + { + "epoch": 43.7, + "learning_rate": 1.1260820474219045e-05, + "loss": 0.098, + "step": 116100 + }, + { + "epoch": 43.73, + "learning_rate": 1.1253293187805795e-05, + "loss": 0.0974, + "step": 116200 + }, + { + "epoch": 43.77, + "learning_rate": 1.124576590139255e-05, + "loss": 0.0957, + "step": 116300 + }, + { + "epoch": 43.81, + "learning_rate": 1.12382386149793e-05, + "loss": 0.0974, + "step": 116400 + }, + { + "epoch": 43.85, + "learning_rate": 1.1230711328566053e-05, + "loss": 0.096, + "step": 116500 + }, + { + "epoch": 43.88, + "learning_rate": 1.1223184042152804e-05, + "loss": 0.0979, + "step": 116600 + }, + { + "epoch": 43.92, + "learning_rate": 1.1215656755739556e-05, + "loss": 0.0958, + "step": 116700 + }, + { + "epoch": 43.96, + "learning_rate": 1.120812946932631e-05, + "loss": 0.097, + "step": 116800 + }, + { + "epoch": 44.0, + "learning_rate": 1.1200602182913061e-05, + "loss": 0.0991, + "step": 116900 + }, + { + "epoch": 44.0, + "eval_loss": 0.09703505784273148, + "eval_runtime": 45.5627, + "eval_samples_per_second": 164.608, + "eval_steps_per_second": 10.294, + "step": 116908 + }, + { + "epoch": 44.03, + "learning_rate": 1.1193074896499814e-05, + "loss": 0.0984, + "step": 117000 + }, + { + "epoch": 44.07, + "learning_rate": 1.1185547610086565e-05, + "loss": 0.0965, + "step": 117100 + }, + { + "epoch": 44.11, + "learning_rate": 1.1178020323673317e-05, + "loss": 0.0957, + "step": 117200 + }, + { + "epoch": 44.15, + "learning_rate": 1.1170493037260068e-05, + "loss": 0.0971, + "step": 117300 + }, + { + "epoch": 44.19, + "learning_rate": 1.1162965750846822e-05, + "loss": 0.096, + "step": 117400 + }, + { + "epoch": 44.22, + "learning_rate": 1.1155438464433573e-05, + "loss": 0.0959, + "step": 117500 + }, + { + "epoch": 44.26, + "learning_rate": 1.1147911178020326e-05, + "loss": 0.0965, + "step": 117600 + }, + { + "epoch": 44.3, + "learning_rate": 1.1140383891607076e-05, + "loss": 0.0987, + "step": 117700 + }, + { + "epoch": 44.34, + "learning_rate": 1.1132856605193829e-05, + "loss": 0.0972, + "step": 117800 + }, + { + "epoch": 44.37, + "learning_rate": 1.112532931878058e-05, + "loss": 0.0962, + "step": 117900 + }, + { + "epoch": 44.41, + "learning_rate": 1.1117802032367332e-05, + "loss": 0.0956, + "step": 118000 + }, + { + "epoch": 44.45, + "learning_rate": 1.1110274745954083e-05, + "loss": 0.0974, + "step": 118100 + }, + { + "epoch": 44.49, + "learning_rate": 1.1102747459540837e-05, + "loss": 0.0973, + "step": 118200 + }, + { + "epoch": 44.52, + "learning_rate": 1.1095220173127588e-05, + "loss": 0.0952, + "step": 118300 + }, + { + "epoch": 44.56, + "learning_rate": 1.108769288671434e-05, + "loss": 0.0968, + "step": 118400 + }, + { + "epoch": 44.6, + "learning_rate": 1.1080165600301091e-05, + "loss": 0.0971, + "step": 118500 + }, + { + "epoch": 44.64, + "learning_rate": 1.1072638313887844e-05, + "loss": 0.1, + "step": 118600 + }, + { + "epoch": 44.67, + "learning_rate": 1.1065111027474595e-05, + "loss": 0.0966, + "step": 118700 + }, + { + "epoch": 44.71, + "learning_rate": 1.1057583741061349e-05, + "loss": 0.0973, + "step": 118800 + }, + { + "epoch": 44.75, + "learning_rate": 1.10500564546481e-05, + "loss": 0.0957, + "step": 118900 + }, + { + "epoch": 44.79, + "learning_rate": 1.1042529168234852e-05, + "loss": 0.0964, + "step": 119000 + }, + { + "epoch": 44.82, + "learning_rate": 1.1035001881821603e-05, + "loss": 0.0954, + "step": 119100 + }, + { + "epoch": 44.86, + "learning_rate": 1.1027474595408356e-05, + "loss": 0.0971, + "step": 119200 + }, + { + "epoch": 44.9, + "learning_rate": 1.101994730899511e-05, + "loss": 0.0973, + "step": 119300 + }, + { + "epoch": 44.94, + "learning_rate": 1.101242002258186e-05, + "loss": 0.0975, + "step": 119400 + }, + { + "epoch": 44.98, + "learning_rate": 1.1004892736168613e-05, + "loss": 0.0979, + "step": 119500 + }, + { + "epoch": 45.0, + "eval_loss": 0.09719178825616837, + "eval_runtime": 45.3083, + "eval_samples_per_second": 165.532, + "eval_steps_per_second": 10.351, + "step": 119565 + }, + { + "epoch": 45.01, + "learning_rate": 1.0997365449755364e-05, + "loss": 0.0972, + "step": 119600 + }, + { + "epoch": 45.05, + "learning_rate": 1.0989838163342116e-05, + "loss": 0.0977, + "step": 119700 + }, + { + "epoch": 45.09, + "learning_rate": 1.0982310876928867e-05, + "loss": 0.0973, + "step": 119800 + }, + { + "epoch": 45.13, + "learning_rate": 1.0974783590515621e-05, + "loss": 0.0954, + "step": 119900 + }, + { + "epoch": 45.16, + "learning_rate": 1.0967256304102372e-05, + "loss": 0.0969, + "step": 120000 + }, + { + "epoch": 45.2, + "learning_rate": 1.0959729017689125e-05, + "loss": 0.0958, + "step": 120100 + }, + { + "epoch": 45.24, + "learning_rate": 1.0952201731275876e-05, + "loss": 0.0964, + "step": 120200 + }, + { + "epoch": 45.28, + "learning_rate": 1.0944674444862628e-05, + "loss": 0.0961, + "step": 120300 + }, + { + "epoch": 45.31, + "learning_rate": 1.0937147158449379e-05, + "loss": 0.0968, + "step": 120400 + }, + { + "epoch": 45.35, + "learning_rate": 1.0929619872036133e-05, + "loss": 0.0973, + "step": 120500 + }, + { + "epoch": 45.39, + "learning_rate": 1.0922092585622884e-05, + "loss": 0.0983, + "step": 120600 + }, + { + "epoch": 45.43, + "learning_rate": 1.0914565299209637e-05, + "loss": 0.0979, + "step": 120700 + }, + { + "epoch": 45.46, + "learning_rate": 1.0907038012796387e-05, + "loss": 0.0959, + "step": 120800 + }, + { + "epoch": 45.5, + "learning_rate": 1.089951072638314e-05, + "loss": 0.0967, + "step": 120900 + }, + { + "epoch": 45.54, + "learning_rate": 1.089198343996989e-05, + "loss": 0.0978, + "step": 121000 + }, + { + "epoch": 45.58, + "learning_rate": 1.0884456153556643e-05, + "loss": 0.0966, + "step": 121100 + }, + { + "epoch": 45.62, + "learning_rate": 1.0876928867143394e-05, + "loss": 0.0967, + "step": 121200 + }, + { + "epoch": 45.65, + "learning_rate": 1.0869401580730148e-05, + "loss": 0.0981, + "step": 121300 + }, + { + "epoch": 45.69, + "learning_rate": 1.0861874294316899e-05, + "loss": 0.0955, + "step": 121400 + }, + { + "epoch": 45.73, + "learning_rate": 1.0854347007903652e-05, + "loss": 0.0981, + "step": 121500 + }, + { + "epoch": 45.77, + "learning_rate": 1.0846819721490402e-05, + "loss": 0.0963, + "step": 121600 + }, + { + "epoch": 45.8, + "learning_rate": 1.0839292435077155e-05, + "loss": 0.096, + "step": 121700 + }, + { + "epoch": 45.84, + "learning_rate": 1.0831765148663909e-05, + "loss": 0.0969, + "step": 121800 + }, + { + "epoch": 45.88, + "learning_rate": 1.082423786225066e-05, + "loss": 0.0959, + "step": 121900 + }, + { + "epoch": 45.92, + "learning_rate": 1.0816710575837412e-05, + "loss": 0.0969, + "step": 122000 + }, + { + "epoch": 45.95, + "learning_rate": 1.0809183289424163e-05, + "loss": 0.0968, + "step": 122100 + }, + { + "epoch": 45.99, + "learning_rate": 1.0801656003010916e-05, + "loss": 0.097, + "step": 122200 + }, + { + "epoch": 46.0, + "eval_loss": 0.09704454988241196, + "eval_runtime": 45.4128, + "eval_samples_per_second": 165.152, + "eval_steps_per_second": 10.327, + "step": 122222 + }, + { + "epoch": 46.03, + "learning_rate": 1.0794128716597667e-05, + "loss": 0.0977, + "step": 122300 + }, + { + "epoch": 46.07, + "learning_rate": 1.078660143018442e-05, + "loss": 0.0966, + "step": 122400 + }, + { + "epoch": 46.1, + "learning_rate": 1.0779074143771172e-05, + "loss": 0.0972, + "step": 122500 + }, + { + "epoch": 46.14, + "learning_rate": 1.0771546857357924e-05, + "loss": 0.0965, + "step": 122600 + }, + { + "epoch": 46.18, + "learning_rate": 1.0764019570944675e-05, + "loss": 0.0959, + "step": 122700 + }, + { + "epoch": 46.22, + "learning_rate": 1.0756492284531427e-05, + "loss": 0.0955, + "step": 122800 + }, + { + "epoch": 46.26, + "learning_rate": 1.0748964998118178e-05, + "loss": 0.0963, + "step": 122900 + }, + { + "epoch": 46.29, + "learning_rate": 1.0741437711704932e-05, + "loss": 0.0965, + "step": 123000 + }, + { + "epoch": 46.33, + "learning_rate": 1.0733910425291683e-05, + "loss": 0.0966, + "step": 123100 + }, + { + "epoch": 46.37, + "learning_rate": 1.0726383138878436e-05, + "loss": 0.0959, + "step": 123200 + }, + { + "epoch": 46.41, + "learning_rate": 1.0718855852465187e-05, + "loss": 0.0975, + "step": 123300 + }, + { + "epoch": 46.44, + "learning_rate": 1.0711328566051939e-05, + "loss": 0.0957, + "step": 123400 + }, + { + "epoch": 46.48, + "learning_rate": 1.070380127963869e-05, + "loss": 0.0966, + "step": 123500 + }, + { + "epoch": 46.52, + "learning_rate": 1.0696273993225444e-05, + "loss": 0.0973, + "step": 123600 + }, + { + "epoch": 46.56, + "learning_rate": 1.0688746706812195e-05, + "loss": 0.0979, + "step": 123700 + }, + { + "epoch": 46.59, + "learning_rate": 1.0681219420398947e-05, + "loss": 0.0983, + "step": 123800 + }, + { + "epoch": 46.63, + "learning_rate": 1.0673692133985698e-05, + "loss": 0.0969, + "step": 123900 + }, + { + "epoch": 46.67, + "learning_rate": 1.066616484757245e-05, + "loss": 0.097, + "step": 124000 + }, + { + "epoch": 46.71, + "learning_rate": 1.0658637561159202e-05, + "loss": 0.0969, + "step": 124100 + }, + { + "epoch": 46.74, + "learning_rate": 1.0651110274745954e-05, + "loss": 0.0967, + "step": 124200 + }, + { + "epoch": 46.78, + "learning_rate": 1.0643582988332708e-05, + "loss": 0.0955, + "step": 124300 + }, + { + "epoch": 46.82, + "learning_rate": 1.063605570191946e-05, + "loss": 0.0967, + "step": 124400 + }, + { + "epoch": 46.86, + "learning_rate": 1.0628528415506212e-05, + "loss": 0.0972, + "step": 124500 + }, + { + "epoch": 46.89, + "learning_rate": 1.0621001129092963e-05, + "loss": 0.0975, + "step": 124600 + }, + { + "epoch": 46.93, + "learning_rate": 1.0613473842679715e-05, + "loss": 0.0956, + "step": 124700 + }, + { + "epoch": 46.97, + "learning_rate": 1.0605946556266466e-05, + "loss": 0.0936, + "step": 124800 + }, + { + "epoch": 47.0, + "eval_loss": 0.096713587641716, + "eval_runtime": 45.4283, + "eval_samples_per_second": 165.095, + "eval_steps_per_second": 10.324, + "step": 124879 + }, + { + "epoch": 47.01, + "learning_rate": 1.059841926985322e-05, + "loss": 0.0966, + "step": 124900 + }, + { + "epoch": 47.05, + "learning_rate": 1.0590891983439971e-05, + "loss": 0.0958, + "step": 125000 + }, + { + "epoch": 47.08, + "learning_rate": 1.0583364697026723e-05, + "loss": 0.0961, + "step": 125100 + }, + { + "epoch": 47.12, + "learning_rate": 1.0575837410613474e-05, + "loss": 0.0955, + "step": 125200 + }, + { + "epoch": 47.16, + "learning_rate": 1.0568310124200227e-05, + "loss": 0.0958, + "step": 125300 + }, + { + "epoch": 47.2, + "learning_rate": 1.0560782837786978e-05, + "loss": 0.0961, + "step": 125400 + }, + { + "epoch": 47.23, + "learning_rate": 1.0553255551373732e-05, + "loss": 0.0959, + "step": 125500 + }, + { + "epoch": 47.27, + "learning_rate": 1.0545728264960483e-05, + "loss": 0.0954, + "step": 125600 + }, + { + "epoch": 47.31, + "learning_rate": 1.0538200978547235e-05, + "loss": 0.0992, + "step": 125700 + }, + { + "epoch": 47.35, + "learning_rate": 1.0530673692133986e-05, + "loss": 0.0967, + "step": 125800 + }, + { + "epoch": 47.38, + "learning_rate": 1.0523146405720738e-05, + "loss": 0.0957, + "step": 125900 + }, + { + "epoch": 47.42, + "learning_rate": 1.051561911930749e-05, + "loss": 0.0963, + "step": 126000 + }, + { + "epoch": 47.46, + "learning_rate": 1.0508091832894243e-05, + "loss": 0.0959, + "step": 126100 + }, + { + "epoch": 47.5, + "learning_rate": 1.0500564546480994e-05, + "loss": 0.0956, + "step": 126200 + }, + { + "epoch": 47.53, + "learning_rate": 1.0493037260067747e-05, + "loss": 0.0976, + "step": 126300 + }, + { + "epoch": 47.57, + "learning_rate": 1.0485509973654498e-05, + "loss": 0.0981, + "step": 126400 + }, + { + "epoch": 47.61, + "learning_rate": 1.047798268724125e-05, + "loss": 0.0982, + "step": 126500 + }, + { + "epoch": 47.65, + "learning_rate": 1.0470455400828001e-05, + "loss": 0.0959, + "step": 126600 + }, + { + "epoch": 47.69, + "learning_rate": 1.0462928114414755e-05, + "loss": 0.098, + "step": 126700 + }, + { + "epoch": 47.72, + "learning_rate": 1.0455400828001508e-05, + "loss": 0.0958, + "step": 126800 + }, + { + "epoch": 47.76, + "learning_rate": 1.0447873541588258e-05, + "loss": 0.0977, + "step": 126900 + }, + { + "epoch": 47.8, + "learning_rate": 1.0440346255175011e-05, + "loss": 0.0944, + "step": 127000 + }, + { + "epoch": 47.84, + "learning_rate": 1.0432818968761762e-05, + "loss": 0.0953, + "step": 127100 + }, + { + "epoch": 47.87, + "learning_rate": 1.0425291682348514e-05, + "loss": 0.0949, + "step": 127200 + }, + { + "epoch": 47.91, + "learning_rate": 1.0417764395935265e-05, + "loss": 0.0957, + "step": 127300 + }, + { + "epoch": 47.95, + "learning_rate": 1.041023710952202e-05, + "loss": 0.0976, + "step": 127400 + }, + { + "epoch": 47.99, + "learning_rate": 1.040270982310877e-05, + "loss": 0.0948, + "step": 127500 + }, + { + "epoch": 48.0, + "eval_loss": 0.0966743603348732, + "eval_runtime": 45.5618, + "eval_samples_per_second": 164.611, + "eval_steps_per_second": 10.294, + "step": 127536 + }, + { + "epoch": 48.02, + "learning_rate": 1.0395182536695523e-05, + "loss": 0.096, + "step": 127600 + }, + { + "epoch": 48.06, + "learning_rate": 1.0387655250282273e-05, + "loss": 0.0958, + "step": 127700 + }, + { + "epoch": 48.1, + "learning_rate": 1.0380127963869026e-05, + "loss": 0.0963, + "step": 127800 + }, + { + "epoch": 48.14, + "learning_rate": 1.0372600677455777e-05, + "loss": 0.095, + "step": 127900 + }, + { + "epoch": 48.17, + "learning_rate": 1.0365073391042531e-05, + "loss": 0.0973, + "step": 128000 + }, + { + "epoch": 48.21, + "learning_rate": 1.0357546104629282e-05, + "loss": 0.0958, + "step": 128100 + }, + { + "epoch": 48.25, + "learning_rate": 1.0350018818216034e-05, + "loss": 0.094, + "step": 128200 + }, + { + "epoch": 48.29, + "learning_rate": 1.0342491531802785e-05, + "loss": 0.0965, + "step": 128300 + }, + { + "epoch": 48.33, + "learning_rate": 1.0334964245389538e-05, + "loss": 0.0962, + "step": 128400 + }, + { + "epoch": 48.36, + "learning_rate": 1.0327436958976289e-05, + "loss": 0.0973, + "step": 128500 + }, + { + "epoch": 48.4, + "learning_rate": 1.0319909672563043e-05, + "loss": 0.0966, + "step": 128600 + }, + { + "epoch": 48.44, + "learning_rate": 1.0312382386149794e-05, + "loss": 0.0967, + "step": 128700 + }, + { + "epoch": 48.48, + "learning_rate": 1.0304855099736546e-05, + "loss": 0.0983, + "step": 128800 + }, + { + "epoch": 48.51, + "learning_rate": 1.0297327813323297e-05, + "loss": 0.0953, + "step": 128900 + }, + { + "epoch": 48.55, + "learning_rate": 1.028980052691005e-05, + "loss": 0.0951, + "step": 129000 + }, + { + "epoch": 48.59, + "learning_rate": 1.02822732404968e-05, + "loss": 0.0988, + "step": 129100 + }, + { + "epoch": 48.63, + "learning_rate": 1.0274745954083554e-05, + "loss": 0.095, + "step": 129200 + }, + { + "epoch": 48.66, + "learning_rate": 1.0267218667670307e-05, + "loss": 0.0956, + "step": 129300 + }, + { + "epoch": 48.7, + "learning_rate": 1.0259691381257058e-05, + "loss": 0.0962, + "step": 129400 + }, + { + "epoch": 48.74, + "learning_rate": 1.025216409484381e-05, + "loss": 0.0964, + "step": 129500 + }, + { + "epoch": 48.78, + "learning_rate": 1.0244636808430561e-05, + "loss": 0.0952, + "step": 129600 + }, + { + "epoch": 48.81, + "learning_rate": 1.0237109522017315e-05, + "loss": 0.0959, + "step": 129700 + }, + { + "epoch": 48.85, + "learning_rate": 1.0229582235604066e-05, + "loss": 0.0965, + "step": 129800 + }, + { + "epoch": 48.89, + "learning_rate": 1.0222054949190819e-05, + "loss": 0.0949, + "step": 129900 + }, + { + "epoch": 48.93, + "learning_rate": 1.021452766277757e-05, + "loss": 0.0966, + "step": 130000 + }, + { + "epoch": 48.96, + "learning_rate": 1.0207000376364322e-05, + "loss": 0.0974, + "step": 130100 + }, + { + "epoch": 49.0, + "eval_loss": 0.09543051570653915, + "eval_runtime": 45.608, + "eval_samples_per_second": 164.445, + "eval_steps_per_second": 10.283, + "step": 130193 + }, + { + "epoch": 49.0, + "learning_rate": 1.0199473089951073e-05, + "loss": 0.0959, + "step": 130200 + }, + { + "epoch": 49.04, + "learning_rate": 1.0191945803537825e-05, + "loss": 0.0962, + "step": 130300 + }, + { + "epoch": 49.08, + "learning_rate": 1.0184418517124576e-05, + "loss": 0.0954, + "step": 130400 + }, + { + "epoch": 49.12, + "learning_rate": 1.017689123071133e-05, + "loss": 0.0959, + "step": 130500 + }, + { + "epoch": 49.15, + "learning_rate": 1.0169363944298081e-05, + "loss": 0.0949, + "step": 130600 + }, + { + "epoch": 49.19, + "learning_rate": 1.0161836657884834e-05, + "loss": 0.0967, + "step": 130700 + }, + { + "epoch": 49.23, + "learning_rate": 1.0154309371471584e-05, + "loss": 0.0965, + "step": 130800 + }, + { + "epoch": 49.27, + "learning_rate": 1.0146782085058337e-05, + "loss": 0.0963, + "step": 130900 + }, + { + "epoch": 49.3, + "learning_rate": 1.0139254798645088e-05, + "loss": 0.0956, + "step": 131000 + }, + { + "epoch": 49.34, + "learning_rate": 1.0131727512231842e-05, + "loss": 0.0959, + "step": 131100 + }, + { + "epoch": 49.38, + "learning_rate": 1.0124200225818593e-05, + "loss": 0.0958, + "step": 131200 + }, + { + "epoch": 49.42, + "learning_rate": 1.0116672939405345e-05, + "loss": 0.0974, + "step": 131300 + }, + { + "epoch": 49.45, + "learning_rate": 1.0109145652992096e-05, + "loss": 0.0984, + "step": 131400 + }, + { + "epoch": 49.49, + "learning_rate": 1.0101618366578849e-05, + "loss": 0.0946, + "step": 131500 + }, + { + "epoch": 49.53, + "learning_rate": 1.00940910801656e-05, + "loss": 0.0967, + "step": 131600 + }, + { + "epoch": 49.57, + "learning_rate": 1.0086563793752354e-05, + "loss": 0.0968, + "step": 131700 + }, + { + "epoch": 49.6, + "learning_rate": 1.0079036507339106e-05, + "loss": 0.0961, + "step": 131800 + }, + { + "epoch": 49.64, + "learning_rate": 1.0071509220925857e-05, + "loss": 0.0946, + "step": 131900 + }, + { + "epoch": 49.68, + "learning_rate": 1.006398193451261e-05, + "loss": 0.0959, + "step": 132000 + }, + { + "epoch": 49.72, + "learning_rate": 1.005645464809936e-05, + "loss": 0.0959, + "step": 132100 + }, + { + "epoch": 49.76, + "learning_rate": 1.0048927361686115e-05, + "loss": 0.0974, + "step": 132200 + }, + { + "epoch": 49.79, + "learning_rate": 1.0041400075272865e-05, + "loss": 0.095, + "step": 132300 + }, + { + "epoch": 49.83, + "learning_rate": 1.0033872788859618e-05, + "loss": 0.0954, + "step": 132400 + }, + { + "epoch": 49.87, + "learning_rate": 1.0026345502446369e-05, + "loss": 0.0958, + "step": 132500 + }, + { + "epoch": 49.91, + "learning_rate": 1.0018818216033121e-05, + "loss": 0.0959, + "step": 132600 + }, + { + "epoch": 49.94, + "learning_rate": 1.0011290929619872e-05, + "loss": 0.095, + "step": 132700 + }, + { + "epoch": 49.98, + "learning_rate": 1.0003763643206625e-05, + "loss": 0.0958, + "step": 132800 + }, + { + "epoch": 50.0, + "eval_loss": 0.09539712220430374, + "eval_runtime": 45.0771, + "eval_samples_per_second": 166.382, + "eval_steps_per_second": 10.404, + "step": 132850 + }, + { + "epoch": 50.02, + "learning_rate": 9.996236356793377e-06, + "loss": 0.0956, + "step": 132900 + }, + { + "epoch": 50.06, + "learning_rate": 9.98870907038013e-06, + "loss": 0.0943, + "step": 133000 + }, + { + "epoch": 50.09, + "learning_rate": 9.98118178396688e-06, + "loss": 0.0958, + "step": 133100 + }, + { + "epoch": 50.13, + "learning_rate": 9.973654497553633e-06, + "loss": 0.0952, + "step": 133200 + }, + { + "epoch": 50.17, + "learning_rate": 9.966127211140385e-06, + "loss": 0.0969, + "step": 133300 + }, + { + "epoch": 50.21, + "learning_rate": 9.958599924727136e-06, + "loss": 0.0952, + "step": 133400 + }, + { + "epoch": 50.24, + "learning_rate": 9.951072638313889e-06, + "loss": 0.0959, + "step": 133500 + }, + { + "epoch": 50.28, + "learning_rate": 9.943545351900641e-06, + "loss": 0.0949, + "step": 133600 + }, + { + "epoch": 50.32, + "learning_rate": 9.936018065487392e-06, + "loss": 0.0964, + "step": 133700 + }, + { + "epoch": 50.36, + "learning_rate": 9.928490779074145e-06, + "loss": 0.0952, + "step": 133800 + }, + { + "epoch": 50.4, + "learning_rate": 9.920963492660897e-06, + "loss": 0.0944, + "step": 133900 + }, + { + "epoch": 50.43, + "learning_rate": 9.913436206247648e-06, + "loss": 0.0973, + "step": 134000 + }, + { + "epoch": 50.47, + "learning_rate": 9.9059089198344e-06, + "loss": 0.0949, + "step": 134100 + }, + { + "epoch": 50.51, + "learning_rate": 9.898381633421153e-06, + "loss": 0.096, + "step": 134200 + }, + { + "epoch": 50.55, + "learning_rate": 9.890854347007904e-06, + "loss": 0.0962, + "step": 134300 + }, + { + "epoch": 50.58, + "learning_rate": 9.883327060594656e-06, + "loss": 0.0972, + "step": 134400 + }, + { + "epoch": 50.62, + "learning_rate": 9.875799774181409e-06, + "loss": 0.0931, + "step": 134500 + }, + { + "epoch": 50.66, + "learning_rate": 9.86827248776816e-06, + "loss": 0.0961, + "step": 134600 + }, + { + "epoch": 50.7, + "learning_rate": 9.860745201354912e-06, + "loss": 0.0947, + "step": 134700 + }, + { + "epoch": 50.73, + "learning_rate": 9.853217914941665e-06, + "loss": 0.0968, + "step": 134800 + }, + { + "epoch": 50.77, + "learning_rate": 9.845690628528415e-06, + "loss": 0.095, + "step": 134900 + }, + { + "epoch": 50.81, + "learning_rate": 9.838163342115168e-06, + "loss": 0.0966, + "step": 135000 + }, + { + "epoch": 50.85, + "learning_rate": 9.83063605570192e-06, + "loss": 0.0966, + "step": 135100 + }, + { + "epoch": 50.88, + "learning_rate": 9.823108769288671e-06, + "loss": 0.0957, + "step": 135200 + }, + { + "epoch": 50.92, + "learning_rate": 9.815581482875424e-06, + "loss": 0.0948, + "step": 135300 + }, + { + "epoch": 50.96, + "learning_rate": 9.808054196462176e-06, + "loss": 0.0942, + "step": 135400 + }, + { + "epoch": 51.0, + "learning_rate": 9.800526910048929e-06, + "loss": 0.0948, + "step": 135500 + }, + { + "epoch": 51.0, + "eval_loss": 0.09547575563192368, + "eval_runtime": 45.2525, + "eval_samples_per_second": 165.737, + "eval_steps_per_second": 10.364, + "step": 135507 + }, + { + "epoch": 51.04, + "learning_rate": 9.792999623635681e-06, + "loss": 0.0973, + "step": 135600 + }, + { + "epoch": 51.07, + "learning_rate": 9.785472337222432e-06, + "loss": 0.0962, + "step": 135700 + }, + { + "epoch": 51.11, + "learning_rate": 9.777945050809185e-06, + "loss": 0.0957, + "step": 135800 + }, + { + "epoch": 51.15, + "learning_rate": 9.770417764395936e-06, + "loss": 0.0953, + "step": 135900 + }, + { + "epoch": 51.19, + "learning_rate": 9.762890477982688e-06, + "loss": 0.0948, + "step": 136000 + }, + { + "epoch": 51.22, + "learning_rate": 9.75536319156944e-06, + "loss": 0.0958, + "step": 136100 + }, + { + "epoch": 51.26, + "learning_rate": 9.747835905156191e-06, + "loss": 0.0956, + "step": 136200 + }, + { + "epoch": 51.3, + "learning_rate": 9.740308618742944e-06, + "loss": 0.0957, + "step": 136300 + }, + { + "epoch": 51.34, + "learning_rate": 9.732781332329696e-06, + "loss": 0.0953, + "step": 136400 + }, + { + "epoch": 51.37, + "learning_rate": 9.725254045916447e-06, + "loss": 0.0942, + "step": 136500 + }, + { + "epoch": 51.41, + "learning_rate": 9.7177267595032e-06, + "loss": 0.0952, + "step": 136600 + }, + { + "epoch": 51.45, + "learning_rate": 9.710199473089952e-06, + "loss": 0.0949, + "step": 136700 + }, + { + "epoch": 51.49, + "learning_rate": 9.702672186676703e-06, + "loss": 0.0961, + "step": 136800 + }, + { + "epoch": 51.52, + "learning_rate": 9.695144900263456e-06, + "loss": 0.0949, + "step": 136900 + }, + { + "epoch": 51.56, + "learning_rate": 9.687617613850208e-06, + "loss": 0.0955, + "step": 137000 + }, + { + "epoch": 51.6, + "learning_rate": 9.680090327436959e-06, + "loss": 0.0954, + "step": 137100 + }, + { + "epoch": 51.64, + "learning_rate": 9.672563041023711e-06, + "loss": 0.0958, + "step": 137200 + }, + { + "epoch": 51.67, + "learning_rate": 9.665035754610464e-06, + "loss": 0.0961, + "step": 137300 + }, + { + "epoch": 51.71, + "learning_rate": 9.657508468197215e-06, + "loss": 0.0971, + "step": 137400 + }, + { + "epoch": 51.75, + "learning_rate": 9.649981181783967e-06, + "loss": 0.0952, + "step": 137500 + }, + { + "epoch": 51.79, + "learning_rate": 9.64245389537072e-06, + "loss": 0.0961, + "step": 137600 + }, + { + "epoch": 51.83, + "learning_rate": 9.63492660895747e-06, + "loss": 0.0963, + "step": 137700 + }, + { + "epoch": 51.86, + "learning_rate": 9.627399322544223e-06, + "loss": 0.0952, + "step": 137800 + }, + { + "epoch": 51.9, + "learning_rate": 9.619872036130976e-06, + "loss": 0.0944, + "step": 137900 + }, + { + "epoch": 51.94, + "learning_rate": 9.612344749717728e-06, + "loss": 0.0952, + "step": 138000 + }, + { + "epoch": 51.98, + "learning_rate": 9.60481746330448e-06, + "loss": 0.095, + "step": 138100 + }, + { + "epoch": 52.0, + "eval_loss": 0.0952862873673439, + "eval_runtime": 45.261, + "eval_samples_per_second": 165.706, + "eval_steps_per_second": 10.362, + "step": 138164 + }, + { + "epoch": 52.01, + "learning_rate": 9.597290176891231e-06, + "loss": 0.0958, + "step": 138200 + }, + { + "epoch": 52.05, + "learning_rate": 9.589762890477984e-06, + "loss": 0.0955, + "step": 138300 + }, + { + "epoch": 52.09, + "learning_rate": 9.582235604064737e-06, + "loss": 0.0959, + "step": 138400 + }, + { + "epoch": 52.13, + "learning_rate": 9.574708317651487e-06, + "loss": 0.0971, + "step": 138500 + }, + { + "epoch": 52.16, + "learning_rate": 9.56718103123824e-06, + "loss": 0.0952, + "step": 138600 + }, + { + "epoch": 52.2, + "learning_rate": 9.559653744824992e-06, + "loss": 0.0955, + "step": 138700 + }, + { + "epoch": 52.24, + "learning_rate": 9.552126458411743e-06, + "loss": 0.0946, + "step": 138800 + }, + { + "epoch": 52.28, + "learning_rate": 9.544599171998496e-06, + "loss": 0.0965, + "step": 138900 + }, + { + "epoch": 52.31, + "learning_rate": 9.537071885585247e-06, + "loss": 0.0941, + "step": 139000 + }, + { + "epoch": 52.35, + "learning_rate": 9.529544599171999e-06, + "loss": 0.096, + "step": 139100 + }, + { + "epoch": 52.39, + "learning_rate": 9.522017312758752e-06, + "loss": 0.0928, + "step": 139200 + }, + { + "epoch": 52.43, + "learning_rate": 9.514490026345502e-06, + "loss": 0.096, + "step": 139300 + }, + { + "epoch": 52.47, + "learning_rate": 9.506962739932255e-06, + "loss": 0.0953, + "step": 139400 + }, + { + "epoch": 52.5, + "learning_rate": 9.499435453519007e-06, + "loss": 0.0943, + "step": 139500 + }, + { + "epoch": 52.54, + "learning_rate": 9.491908167105758e-06, + "loss": 0.0948, + "step": 139600 + }, + { + "epoch": 52.58, + "learning_rate": 9.48438088069251e-06, + "loss": 0.0953, + "step": 139700 + }, + { + "epoch": 52.62, + "learning_rate": 9.476853594279263e-06, + "loss": 0.0952, + "step": 139800 + }, + { + "epoch": 52.65, + "learning_rate": 9.469326307866014e-06, + "loss": 0.0934, + "step": 139900 + }, + { + "epoch": 52.69, + "learning_rate": 9.461799021452767e-06, + "loss": 0.0965, + "step": 140000 + }, + { + "epoch": 52.73, + "learning_rate": 9.454271735039519e-06, + "loss": 0.0962, + "step": 140100 + }, + { + "epoch": 52.77, + "learning_rate": 9.44674444862627e-06, + "loss": 0.0954, + "step": 140200 + }, + { + "epoch": 52.8, + "learning_rate": 9.439217162213024e-06, + "loss": 0.095, + "step": 140300 + }, + { + "epoch": 52.84, + "learning_rate": 9.431689875799775e-06, + "loss": 0.0941, + "step": 140400 + }, + { + "epoch": 52.88, + "learning_rate": 9.424162589386527e-06, + "loss": 0.0953, + "step": 140500 + }, + { + "epoch": 52.92, + "learning_rate": 9.41663530297328e-06, + "loss": 0.0963, + "step": 140600 + }, + { + "epoch": 52.95, + "learning_rate": 9.40910801656003e-06, + "loss": 0.0958, + "step": 140700 + }, + { + "epoch": 52.99, + "learning_rate": 9.401580730146783e-06, + "loss": 0.0939, + "step": 140800 + }, + { + "epoch": 53.0, + "eval_loss": 0.09453196078538895, + "eval_runtime": 45.2882, + "eval_samples_per_second": 165.606, + "eval_steps_per_second": 10.356, + "step": 140821 + }, + { + "epoch": 53.03, + "learning_rate": 9.394053443733536e-06, + "loss": 0.095, + "step": 140900 + }, + { + "epoch": 53.07, + "learning_rate": 9.386526157320287e-06, + "loss": 0.0955, + "step": 141000 + }, + { + "epoch": 53.11, + "learning_rate": 9.378998870907039e-06, + "loss": 0.0944, + "step": 141100 + }, + { + "epoch": 53.14, + "learning_rate": 9.371471584493792e-06, + "loss": 0.0953, + "step": 141200 + }, + { + "epoch": 53.18, + "learning_rate": 9.363944298080542e-06, + "loss": 0.0945, + "step": 141300 + }, + { + "epoch": 53.22, + "learning_rate": 9.356417011667295e-06, + "loss": 0.0959, + "step": 141400 + }, + { + "epoch": 53.26, + "learning_rate": 9.348889725254047e-06, + "loss": 0.0938, + "step": 141500 + }, + { + "epoch": 53.29, + "learning_rate": 9.341362438840798e-06, + "loss": 0.0956, + "step": 141600 + }, + { + "epoch": 53.33, + "learning_rate": 9.33383515242755e-06, + "loss": 0.0962, + "step": 141700 + }, + { + "epoch": 53.37, + "learning_rate": 9.326307866014303e-06, + "loss": 0.0959, + "step": 141800 + }, + { + "epoch": 53.41, + "learning_rate": 9.318780579601054e-06, + "loss": 0.0953, + "step": 141900 + }, + { + "epoch": 53.44, + "learning_rate": 9.311253293187807e-06, + "loss": 0.0952, + "step": 142000 + }, + { + "epoch": 53.48, + "learning_rate": 9.303726006774557e-06, + "loss": 0.0945, + "step": 142100 + }, + { + "epoch": 53.52, + "learning_rate": 9.29619872036131e-06, + "loss": 0.0958, + "step": 142200 + }, + { + "epoch": 53.56, + "learning_rate": 9.288671433948063e-06, + "loss": 0.0949, + "step": 142300 + }, + { + "epoch": 53.59, + "learning_rate": 9.281144147534813e-06, + "loss": 0.0943, + "step": 142400 + }, + { + "epoch": 53.63, + "learning_rate": 9.273616861121566e-06, + "loss": 0.0957, + "step": 142500 + }, + { + "epoch": 53.67, + "learning_rate": 9.266089574708318e-06, + "loss": 0.0949, + "step": 142600 + }, + { + "epoch": 53.71, + "learning_rate": 9.25856228829507e-06, + "loss": 0.0939, + "step": 142700 + }, + { + "epoch": 53.74, + "learning_rate": 9.251035001881823e-06, + "loss": 0.0942, + "step": 142800 + }, + { + "epoch": 53.78, + "learning_rate": 9.243507715468574e-06, + "loss": 0.0955, + "step": 142900 + }, + { + "epoch": 53.82, + "learning_rate": 9.235980429055327e-06, + "loss": 0.0935, + "step": 143000 + }, + { + "epoch": 53.86, + "learning_rate": 9.22845314264208e-06, + "loss": 0.0952, + "step": 143100 + }, + { + "epoch": 53.9, + "learning_rate": 9.22092585622883e-06, + "loss": 0.0946, + "step": 143200 + }, + { + "epoch": 53.93, + "learning_rate": 9.213398569815583e-06, + "loss": 0.0971, + "step": 143300 + }, + { + "epoch": 53.97, + "learning_rate": 9.205871283402335e-06, + "loss": 0.0961, + "step": 143400 + }, + { + "epoch": 54.0, + "eval_loss": 0.09483154118061066, + "eval_runtime": 45.4413, + "eval_samples_per_second": 165.048, + "eval_steps_per_second": 10.321, + "step": 143478 + }, + { + "epoch": 54.01, + "learning_rate": 9.198343996989086e-06, + "loss": 0.0943, + "step": 143500 + }, + { + "epoch": 54.05, + "learning_rate": 9.190816710575838e-06, + "loss": 0.097, + "step": 143600 + }, + { + "epoch": 54.08, + "learning_rate": 9.183289424162591e-06, + "loss": 0.0956, + "step": 143700 + }, + { + "epoch": 54.12, + "learning_rate": 9.175762137749342e-06, + "loss": 0.0945, + "step": 143800 + }, + { + "epoch": 54.16, + "learning_rate": 9.168234851336094e-06, + "loss": 0.0968, + "step": 143900 + }, + { + "epoch": 54.2, + "learning_rate": 9.160707564922847e-06, + "loss": 0.0955, + "step": 144000 + }, + { + "epoch": 54.23, + "learning_rate": 9.153180278509598e-06, + "loss": 0.0952, + "step": 144100 + }, + { + "epoch": 54.27, + "learning_rate": 9.14565299209635e-06, + "loss": 0.0941, + "step": 144200 + }, + { + "epoch": 54.31, + "learning_rate": 9.138125705683103e-06, + "loss": 0.0935, + "step": 144300 + }, + { + "epoch": 54.35, + "learning_rate": 9.130598419269853e-06, + "loss": 0.0949, + "step": 144400 + }, + { + "epoch": 54.38, + "learning_rate": 9.123071132856606e-06, + "loss": 0.0957, + "step": 144500 + }, + { + "epoch": 54.42, + "learning_rate": 9.115543846443358e-06, + "loss": 0.0958, + "step": 144600 + }, + { + "epoch": 54.46, + "learning_rate": 9.10801656003011e-06, + "loss": 0.0932, + "step": 144700 + }, + { + "epoch": 54.5, + "learning_rate": 9.100489273616862e-06, + "loss": 0.0945, + "step": 144800 + }, + { + "epoch": 54.54, + "learning_rate": 9.092961987203613e-06, + "loss": 0.0956, + "step": 144900 + }, + { + "epoch": 54.57, + "learning_rate": 9.085434700790365e-06, + "loss": 0.0949, + "step": 145000 + }, + { + "epoch": 54.61, + "learning_rate": 9.077907414377118e-06, + "loss": 0.0942, + "step": 145100 + }, + { + "epoch": 54.65, + "learning_rate": 9.070380127963868e-06, + "loss": 0.0949, + "step": 145200 + }, + { + "epoch": 54.69, + "learning_rate": 9.062852841550623e-06, + "loss": 0.0947, + "step": 145300 + }, + { + "epoch": 54.72, + "learning_rate": 9.055325555137373e-06, + "loss": 0.0952, + "step": 145400 + }, + { + "epoch": 54.76, + "learning_rate": 9.047798268724126e-06, + "loss": 0.096, + "step": 145500 + }, + { + "epoch": 54.8, + "learning_rate": 9.040270982310879e-06, + "loss": 0.0953, + "step": 145600 + }, + { + "epoch": 54.84, + "learning_rate": 9.03274369589763e-06, + "loss": 0.094, + "step": 145700 + }, + { + "epoch": 54.87, + "learning_rate": 9.025216409484382e-06, + "loss": 0.0936, + "step": 145800 + }, + { + "epoch": 54.91, + "learning_rate": 9.017689123071134e-06, + "loss": 0.0951, + "step": 145900 + }, + { + "epoch": 54.95, + "learning_rate": 9.010161836657885e-06, + "loss": 0.0959, + "step": 146000 + }, + { + "epoch": 54.99, + "learning_rate": 9.002634550244638e-06, + "loss": 0.0964, + "step": 146100 + }, + { + "epoch": 55.0, + "eval_loss": 0.09549073874950409, + "eval_runtime": 45.2457, + "eval_samples_per_second": 165.762, + "eval_steps_per_second": 10.366, + "step": 146135 + }, + { + "epoch": 55.02, + "learning_rate": 8.99510726383139e-06, + "loss": 0.0963, + "step": 146200 + }, + { + "epoch": 55.06, + "learning_rate": 8.987579977418141e-06, + "loss": 0.0962, + "step": 146300 + }, + { + "epoch": 55.1, + "learning_rate": 8.980052691004894e-06, + "loss": 0.0954, + "step": 146400 + }, + { + "epoch": 55.14, + "learning_rate": 8.972525404591646e-06, + "loss": 0.0934, + "step": 146500 + }, + { + "epoch": 55.18, + "learning_rate": 8.964998118178397e-06, + "loss": 0.0945, + "step": 146600 + }, + { + "epoch": 55.21, + "learning_rate": 8.95747083176515e-06, + "loss": 0.0936, + "step": 146700 + }, + { + "epoch": 55.25, + "learning_rate": 8.949943545351902e-06, + "loss": 0.095, + "step": 146800 + }, + { + "epoch": 55.29, + "learning_rate": 8.942416258938653e-06, + "loss": 0.094, + "step": 146900 + }, + { + "epoch": 55.33, + "learning_rate": 8.934888972525405e-06, + "loss": 0.0944, + "step": 147000 + }, + { + "epoch": 55.36, + "learning_rate": 8.927361686112158e-06, + "loss": 0.0947, + "step": 147100 + }, + { + "epoch": 55.4, + "learning_rate": 8.919834399698909e-06, + "loss": 0.0966, + "step": 147200 + }, + { + "epoch": 55.44, + "learning_rate": 8.912307113285661e-06, + "loss": 0.0933, + "step": 147300 + }, + { + "epoch": 55.48, + "learning_rate": 8.904779826872414e-06, + "loss": 0.0939, + "step": 147400 + }, + { + "epoch": 55.51, + "learning_rate": 8.897252540459164e-06, + "loss": 0.0953, + "step": 147500 + }, + { + "epoch": 55.55, + "learning_rate": 8.889725254045917e-06, + "loss": 0.0963, + "step": 147600 + }, + { + "epoch": 55.59, + "learning_rate": 8.88219796763267e-06, + "loss": 0.0947, + "step": 147700 + }, + { + "epoch": 55.63, + "learning_rate": 8.874670681219422e-06, + "loss": 0.0933, + "step": 147800 + }, + { + "epoch": 55.66, + "learning_rate": 8.867143394806173e-06, + "loss": 0.0951, + "step": 147900 + }, + { + "epoch": 55.7, + "learning_rate": 8.859616108392925e-06, + "loss": 0.0955, + "step": 148000 + }, + { + "epoch": 55.74, + "learning_rate": 8.852088821979678e-06, + "loss": 0.0943, + "step": 148100 + }, + { + "epoch": 55.78, + "learning_rate": 8.844561535566429e-06, + "loss": 0.0942, + "step": 148200 + }, + { + "epoch": 55.81, + "learning_rate": 8.837034249153181e-06, + "loss": 0.0962, + "step": 148300 + }, + { + "epoch": 55.85, + "learning_rate": 8.829506962739934e-06, + "loss": 0.0937, + "step": 148400 + }, + { + "epoch": 55.89, + "learning_rate": 8.821979676326684e-06, + "loss": 0.0944, + "step": 148500 + }, + { + "epoch": 55.93, + "learning_rate": 8.814452389913437e-06, + "loss": 0.0937, + "step": 148600 + }, + { + "epoch": 55.97, + "learning_rate": 8.80692510350019e-06, + "loss": 0.0934, + "step": 148700 + }, + { + "epoch": 56.0, + "eval_loss": 0.0948183611035347, + "eval_runtime": 44.9888, + "eval_samples_per_second": 166.708, + "eval_steps_per_second": 10.425, + "step": 148792 + }, + { + "epoch": 56.0, + "learning_rate": 8.79939781708694e-06, + "loss": 0.0939, + "step": 148800 + }, + { + "epoch": 56.04, + "learning_rate": 8.791870530673693e-06, + "loss": 0.0966, + "step": 148900 + }, + { + "epoch": 56.08, + "learning_rate": 8.784343244260445e-06, + "loss": 0.0951, + "step": 149000 + }, + { + "epoch": 56.12, + "learning_rate": 8.776815957847196e-06, + "loss": 0.0955, + "step": 149100 + }, + { + "epoch": 56.15, + "learning_rate": 8.769288671433949e-06, + "loss": 0.0959, + "step": 149200 + }, + { + "epoch": 56.19, + "learning_rate": 8.761761385020701e-06, + "loss": 0.0949, + "step": 149300 + }, + { + "epoch": 56.23, + "learning_rate": 8.754234098607452e-06, + "loss": 0.0938, + "step": 149400 + }, + { + "epoch": 56.27, + "learning_rate": 8.746706812194205e-06, + "loss": 0.0941, + "step": 149500 + }, + { + "epoch": 56.3, + "learning_rate": 8.739179525780957e-06, + "loss": 0.0939, + "step": 149600 + }, + { + "epoch": 56.34, + "learning_rate": 8.731652239367708e-06, + "loss": 0.0944, + "step": 149700 + }, + { + "epoch": 56.38, + "learning_rate": 8.72412495295446e-06, + "loss": 0.0949, + "step": 149800 + }, + { + "epoch": 56.42, + "learning_rate": 8.716597666541213e-06, + "loss": 0.0952, + "step": 149900 + }, + { + "epoch": 56.45, + "learning_rate": 8.709070380127964e-06, + "loss": 0.0967, + "step": 150000 + }, + { + "epoch": 56.49, + "learning_rate": 8.701543093714716e-06, + "loss": 0.0948, + "step": 150100 + }, + { + "epoch": 56.53, + "learning_rate": 8.694015807301469e-06, + "loss": 0.0942, + "step": 150200 + }, + { + "epoch": 56.57, + "learning_rate": 8.686488520888221e-06, + "loss": 0.0948, + "step": 150300 + }, + { + "epoch": 56.61, + "learning_rate": 8.678961234474974e-06, + "loss": 0.0954, + "step": 150400 + }, + { + "epoch": 56.64, + "learning_rate": 8.671433948061725e-06, + "loss": 0.0951, + "step": 150500 + }, + { + "epoch": 56.68, + "learning_rate": 8.663906661648477e-06, + "loss": 0.094, + "step": 150600 + }, + { + "epoch": 56.72, + "learning_rate": 8.65637937523523e-06, + "loss": 0.094, + "step": 150700 + }, + { + "epoch": 56.76, + "learning_rate": 8.64885208882198e-06, + "loss": 0.0947, + "step": 150800 + }, + { + "epoch": 56.79, + "learning_rate": 8.641324802408733e-06, + "loss": 0.0954, + "step": 150900 + }, + { + "epoch": 56.83, + "learning_rate": 8.633797515995484e-06, + "loss": 0.0941, + "step": 151000 + }, + { + "epoch": 56.87, + "learning_rate": 8.626270229582236e-06, + "loss": 0.0948, + "step": 151100 + }, + { + "epoch": 56.91, + "learning_rate": 8.618742943168989e-06, + "loss": 0.0934, + "step": 151200 + }, + { + "epoch": 56.94, + "learning_rate": 8.61121565675574e-06, + "loss": 0.0927, + "step": 151300 + }, + { + "epoch": 56.98, + "learning_rate": 8.603688370342492e-06, + "loss": 0.0965, + "step": 151400 + }, + { + "epoch": 57.0, + "eval_loss": 0.09426940232515335, + "eval_runtime": 45.2417, + "eval_samples_per_second": 165.776, + "eval_steps_per_second": 10.367, + "step": 151449 + }, + { + "epoch": 57.02, + "learning_rate": 8.596161083929245e-06, + "loss": 0.0941, + "step": 151500 + }, + { + "epoch": 57.06, + "learning_rate": 8.588633797515995e-06, + "loss": 0.0947, + "step": 151600 + }, + { + "epoch": 57.09, + "learning_rate": 8.581106511102748e-06, + "loss": 0.0932, + "step": 151700 + }, + { + "epoch": 57.13, + "learning_rate": 8.5735792246895e-06, + "loss": 0.0947, + "step": 151800 + }, + { + "epoch": 57.17, + "learning_rate": 8.566051938276251e-06, + "loss": 0.0954, + "step": 151900 + }, + { + "epoch": 57.21, + "learning_rate": 8.558524651863004e-06, + "loss": 0.0956, + "step": 152000 + }, + { + "epoch": 57.25, + "learning_rate": 8.550997365449756e-06, + "loss": 0.0939, + "step": 152100 + }, + { + "epoch": 57.28, + "learning_rate": 8.543470079036507e-06, + "loss": 0.0943, + "step": 152200 + }, + { + "epoch": 57.32, + "learning_rate": 8.53594279262326e-06, + "loss": 0.0926, + "step": 152300 + }, + { + "epoch": 57.36, + "learning_rate": 8.528415506210012e-06, + "loss": 0.0936, + "step": 152400 + }, + { + "epoch": 57.4, + "learning_rate": 8.520888219796763e-06, + "loss": 0.0942, + "step": 152500 + }, + { + "epoch": 57.43, + "learning_rate": 8.513360933383515e-06, + "loss": 0.0955, + "step": 152600 + }, + { + "epoch": 57.47, + "learning_rate": 8.505833646970268e-06, + "loss": 0.0955, + "step": 152700 + }, + { + "epoch": 57.51, + "learning_rate": 8.49830636055702e-06, + "loss": 0.0946, + "step": 152800 + }, + { + "epoch": 57.55, + "learning_rate": 8.490779074143773e-06, + "loss": 0.0964, + "step": 152900 + }, + { + "epoch": 57.58, + "learning_rate": 8.483251787730524e-06, + "loss": 0.094, + "step": 153000 + }, + { + "epoch": 57.62, + "learning_rate": 8.475724501317276e-06, + "loss": 0.0949, + "step": 153100 + }, + { + "epoch": 57.66, + "learning_rate": 8.468197214904029e-06, + "loss": 0.0947, + "step": 153200 + }, + { + "epoch": 57.7, + "learning_rate": 8.46066992849078e-06, + "loss": 0.0952, + "step": 153300 + }, + { + "epoch": 57.73, + "learning_rate": 8.453142642077532e-06, + "loss": 0.0939, + "step": 153400 + }, + { + "epoch": 57.77, + "learning_rate": 8.445615355664285e-06, + "loss": 0.0947, + "step": 153500 + }, + { + "epoch": 57.81, + "learning_rate": 8.438088069251036e-06, + "loss": 0.094, + "step": 153600 + }, + { + "epoch": 57.85, + "learning_rate": 8.430560782837788e-06, + "loss": 0.0922, + "step": 153700 + }, + { + "epoch": 57.88, + "learning_rate": 8.423033496424539e-06, + "loss": 0.0947, + "step": 153800 + }, + { + "epoch": 57.92, + "learning_rate": 8.415506210011291e-06, + "loss": 0.0948, + "step": 153900 + }, + { + "epoch": 57.96, + "learning_rate": 8.407978923598044e-06, + "loss": 0.0921, + "step": 154000 + }, + { + "epoch": 58.0, + "learning_rate": 8.400451637184795e-06, + "loss": 0.0966, + "step": 154100 + }, + { + "epoch": 58.0, + "eval_loss": 0.094062440097332, + "eval_runtime": 45.4411, + "eval_samples_per_second": 165.049, + "eval_steps_per_second": 10.321, + "step": 154106 + }, + { + "epoch": 58.04, + "learning_rate": 8.392924350771547e-06, + "loss": 0.0938, + "step": 154200 + }, + { + "epoch": 58.07, + "learning_rate": 8.3853970643583e-06, + "loss": 0.0938, + "step": 154300 + }, + { + "epoch": 58.11, + "learning_rate": 8.37786977794505e-06, + "loss": 0.0936, + "step": 154400 + }, + { + "epoch": 58.15, + "learning_rate": 8.370342491531803e-06, + "loss": 0.0942, + "step": 154500 + }, + { + "epoch": 58.19, + "learning_rate": 8.362815205118556e-06, + "loss": 0.0961, + "step": 154600 + }, + { + "epoch": 58.22, + "learning_rate": 8.355287918705306e-06, + "loss": 0.0945, + "step": 154700 + }, + { + "epoch": 58.26, + "learning_rate": 8.347760632292059e-06, + "loss": 0.0954, + "step": 154800 + }, + { + "epoch": 58.3, + "learning_rate": 8.340233345878811e-06, + "loss": 0.0942, + "step": 154900 + }, + { + "epoch": 58.34, + "learning_rate": 8.332706059465562e-06, + "loss": 0.095, + "step": 155000 + }, + { + "epoch": 58.37, + "learning_rate": 8.325178773052315e-06, + "loss": 0.0945, + "step": 155100 + }, + { + "epoch": 58.41, + "learning_rate": 8.317651486639067e-06, + "loss": 0.0937, + "step": 155200 + }, + { + "epoch": 58.45, + "learning_rate": 8.31012420022582e-06, + "loss": 0.0956, + "step": 155300 + }, + { + "epoch": 58.49, + "learning_rate": 8.302596913812572e-06, + "loss": 0.0926, + "step": 155400 + }, + { + "epoch": 58.52, + "learning_rate": 8.295069627399323e-06, + "loss": 0.0936, + "step": 155500 + }, + { + "epoch": 58.56, + "learning_rate": 8.287542340986076e-06, + "loss": 0.095, + "step": 155600 + }, + { + "epoch": 58.6, + "learning_rate": 8.280015054572828e-06, + "loss": 0.0933, + "step": 155700 + }, + { + "epoch": 58.64, + "learning_rate": 8.272487768159579e-06, + "loss": 0.0941, + "step": 155800 + }, + { + "epoch": 58.68, + "learning_rate": 8.264960481746331e-06, + "loss": 0.0937, + "step": 155900 + }, + { + "epoch": 58.71, + "learning_rate": 8.257433195333084e-06, + "loss": 0.0948, + "step": 156000 + }, + { + "epoch": 58.75, + "learning_rate": 8.249905908919835e-06, + "loss": 0.0947, + "step": 156100 + }, + { + "epoch": 58.79, + "learning_rate": 8.242378622506587e-06, + "loss": 0.0919, + "step": 156200 + }, + { + "epoch": 58.83, + "learning_rate": 8.23485133609334e-06, + "loss": 0.0956, + "step": 156300 + }, + { + "epoch": 58.86, + "learning_rate": 8.22732404968009e-06, + "loss": 0.0946, + "step": 156400 + }, + { + "epoch": 58.9, + "learning_rate": 8.219796763266843e-06, + "loss": 0.0934, + "step": 156500 + }, + { + "epoch": 58.94, + "learning_rate": 8.212269476853596e-06, + "loss": 0.0953, + "step": 156600 + }, + { + "epoch": 58.98, + "learning_rate": 8.204742190440347e-06, + "loss": 0.0926, + "step": 156700 + }, + { + "epoch": 59.0, + "eval_loss": 0.0938277319073677, + "eval_runtime": 45.0217, + "eval_samples_per_second": 166.586, + "eval_steps_per_second": 10.417, + "step": 156763 + }, + { + "epoch": 59.01, + "learning_rate": 8.197214904027099e-06, + "loss": 0.0939, + "step": 156800 + }, + { + "epoch": 59.05, + "learning_rate": 8.18968761761385e-06, + "loss": 0.0931, + "step": 156900 + }, + { + "epoch": 59.09, + "learning_rate": 8.182160331200602e-06, + "loss": 0.0945, + "step": 157000 + }, + { + "epoch": 59.13, + "learning_rate": 8.174633044787355e-06, + "loss": 0.0941, + "step": 157100 + }, + { + "epoch": 59.16, + "learning_rate": 8.167105758374106e-06, + "loss": 0.0937, + "step": 157200 + }, + { + "epoch": 59.2, + "learning_rate": 8.159578471960858e-06, + "loss": 0.0945, + "step": 157300 + }, + { + "epoch": 59.24, + "learning_rate": 8.15205118554761e-06, + "loss": 0.0949, + "step": 157400 + }, + { + "epoch": 59.28, + "learning_rate": 8.144523899134362e-06, + "loss": 0.0926, + "step": 157500 + }, + { + "epoch": 59.32, + "learning_rate": 8.136996612721114e-06, + "loss": 0.0943, + "step": 157600 + }, + { + "epoch": 59.35, + "learning_rate": 8.129469326307867e-06, + "loss": 0.0929, + "step": 157700 + }, + { + "epoch": 59.39, + "learning_rate": 8.121942039894619e-06, + "loss": 0.0964, + "step": 157800 + }, + { + "epoch": 59.43, + "learning_rate": 8.114414753481372e-06, + "loss": 0.0944, + "step": 157900 + }, + { + "epoch": 59.47, + "learning_rate": 8.106887467068122e-06, + "loss": 0.0943, + "step": 158000 + }, + { + "epoch": 59.5, + "learning_rate": 8.099360180654875e-06, + "loss": 0.0962, + "step": 158100 + }, + { + "epoch": 59.54, + "learning_rate": 8.091832894241627e-06, + "loss": 0.0948, + "step": 158200 + }, + { + "epoch": 59.58, + "learning_rate": 8.084305607828378e-06, + "loss": 0.095, + "step": 158300 + }, + { + "epoch": 59.62, + "learning_rate": 8.07677832141513e-06, + "loss": 0.0941, + "step": 158400 + }, + { + "epoch": 59.65, + "learning_rate": 8.069251035001883e-06, + "loss": 0.0949, + "step": 158500 + }, + { + "epoch": 59.69, + "learning_rate": 8.061723748588634e-06, + "loss": 0.0945, + "step": 158600 + }, + { + "epoch": 59.73, + "learning_rate": 8.054196462175387e-06, + "loss": 0.0946, + "step": 158700 + }, + { + "epoch": 59.77, + "learning_rate": 8.046669175762139e-06, + "loss": 0.0948, + "step": 158800 + }, + { + "epoch": 59.8, + "learning_rate": 8.03914188934889e-06, + "loss": 0.0946, + "step": 158900 + }, + { + "epoch": 59.84, + "learning_rate": 8.031614602935642e-06, + "loss": 0.0945, + "step": 159000 + }, + { + "epoch": 59.88, + "learning_rate": 8.024087316522395e-06, + "loss": 0.0944, + "step": 159100 + }, + { + "epoch": 59.92, + "learning_rate": 8.016560030109146e-06, + "loss": 0.0942, + "step": 159200 + }, + { + "epoch": 59.95, + "learning_rate": 8.009032743695898e-06, + "loss": 0.0937, + "step": 159300 + }, + { + "epoch": 59.99, + "learning_rate": 8.00150545728265e-06, + "loss": 0.0928, + "step": 159400 + }, + { + "epoch": 60.0, + "eval_loss": 0.09416601806879044, + "eval_runtime": 45.0453, + "eval_samples_per_second": 166.499, + "eval_steps_per_second": 10.412, + "step": 159420 + }, + { + "epoch": 60.03, + "learning_rate": 7.993978170869402e-06, + "loss": 0.0923, + "step": 159500 + }, + { + "epoch": 60.07, + "learning_rate": 7.986450884456154e-06, + "loss": 0.0956, + "step": 159600 + }, + { + "epoch": 60.11, + "learning_rate": 7.978923598042907e-06, + "loss": 0.0954, + "step": 159700 + }, + { + "epoch": 60.14, + "learning_rate": 7.971396311629657e-06, + "loss": 0.0946, + "step": 159800 + }, + { + "epoch": 60.18, + "learning_rate": 7.96386902521641e-06, + "loss": 0.0952, + "step": 159900 + }, + { + "epoch": 60.22, + "learning_rate": 7.95634173880316e-06, + "loss": 0.0932, + "step": 160000 + }, + { + "epoch": 60.26, + "learning_rate": 7.948814452389913e-06, + "loss": 0.0942, + "step": 160100 + }, + { + "epoch": 60.29, + "learning_rate": 7.941287165976666e-06, + "loss": 0.0923, + "step": 160200 + }, + { + "epoch": 60.33, + "learning_rate": 7.933759879563418e-06, + "loss": 0.0943, + "step": 160300 + }, + { + "epoch": 60.37, + "learning_rate": 7.926232593150171e-06, + "loss": 0.0926, + "step": 160400 + }, + { + "epoch": 60.41, + "learning_rate": 7.918705306736922e-06, + "loss": 0.0944, + "step": 160500 + }, + { + "epoch": 60.44, + "learning_rate": 7.911178020323674e-06, + "loss": 0.0951, + "step": 160600 + }, + { + "epoch": 60.48, + "learning_rate": 7.903650733910427e-06, + "loss": 0.0925, + "step": 160700 + }, + { + "epoch": 60.52, + "learning_rate": 7.896123447497178e-06, + "loss": 0.0938, + "step": 160800 + }, + { + "epoch": 60.56, + "learning_rate": 7.88859616108393e-06, + "loss": 0.0949, + "step": 160900 + }, + { + "epoch": 60.59, + "learning_rate": 7.881068874670683e-06, + "loss": 0.0924, + "step": 161000 + }, + { + "epoch": 60.63, + "learning_rate": 7.873541588257433e-06, + "loss": 0.0945, + "step": 161100 + }, + { + "epoch": 60.67, + "learning_rate": 7.866014301844186e-06, + "loss": 0.0935, + "step": 161200 + }, + { + "epoch": 60.71, + "learning_rate": 7.858487015430938e-06, + "loss": 0.095, + "step": 161300 + }, + { + "epoch": 60.75, + "learning_rate": 7.85095972901769e-06, + "loss": 0.0938, + "step": 161400 + }, + { + "epoch": 60.78, + "learning_rate": 7.843432442604442e-06, + "loss": 0.0937, + "step": 161500 + }, + { + "epoch": 60.82, + "learning_rate": 7.835905156191194e-06, + "loss": 0.0942, + "step": 161600 + }, + { + "epoch": 60.86, + "learning_rate": 7.828377869777945e-06, + "loss": 0.0955, + "step": 161700 + }, + { + "epoch": 60.9, + "learning_rate": 7.820850583364698e-06, + "loss": 0.0932, + "step": 161800 + }, + { + "epoch": 60.93, + "learning_rate": 7.81332329695145e-06, + "loss": 0.094, + "step": 161900 + }, + { + "epoch": 60.97, + "learning_rate": 7.805796010538201e-06, + "loss": 0.093, + "step": 162000 + }, + { + "epoch": 61.0, + "eval_loss": 0.09355577826499939, + "eval_runtime": 45.0615, + "eval_samples_per_second": 166.439, + "eval_steps_per_second": 10.408, + "step": 162077 + }, + { + "epoch": 61.01, + "learning_rate": 7.798268724124953e-06, + "loss": 0.0932, + "step": 162100 + }, + { + "epoch": 61.05, + "learning_rate": 7.790741437711706e-06, + "loss": 0.0942, + "step": 162200 + }, + { + "epoch": 61.08, + "learning_rate": 7.783214151298457e-06, + "loss": 0.0933, + "step": 162300 + }, + { + "epoch": 61.12, + "learning_rate": 7.77568686488521e-06, + "loss": 0.0942, + "step": 162400 + }, + { + "epoch": 61.16, + "learning_rate": 7.768159578471962e-06, + "loss": 0.0938, + "step": 162500 + }, + { + "epoch": 61.2, + "learning_rate": 7.760632292058713e-06, + "loss": 0.0948, + "step": 162600 + }, + { + "epoch": 61.23, + "learning_rate": 7.753105005645465e-06, + "loss": 0.0946, + "step": 162700 + }, + { + "epoch": 61.27, + "learning_rate": 7.745577719232218e-06, + "loss": 0.0926, + "step": 162800 + }, + { + "epoch": 61.31, + "learning_rate": 7.73805043281897e-06, + "loss": 0.0945, + "step": 162900 + }, + { + "epoch": 61.35, + "learning_rate": 7.730523146405721e-06, + "loss": 0.0923, + "step": 163000 + }, + { + "epoch": 61.39, + "learning_rate": 7.722995859992473e-06, + "loss": 0.0935, + "step": 163100 + }, + { + "epoch": 61.42, + "learning_rate": 7.715468573579226e-06, + "loss": 0.0938, + "step": 163200 + }, + { + "epoch": 61.46, + "learning_rate": 7.707941287165977e-06, + "loss": 0.0938, + "step": 163300 + }, + { + "epoch": 61.5, + "learning_rate": 7.70041400075273e-06, + "loss": 0.0929, + "step": 163400 + }, + { + "epoch": 61.54, + "learning_rate": 7.692886714339482e-06, + "loss": 0.0937, + "step": 163500 + }, + { + "epoch": 61.57, + "learning_rate": 7.685359427926233e-06, + "loss": 0.0921, + "step": 163600 + }, + { + "epoch": 61.61, + "learning_rate": 7.677832141512985e-06, + "loss": 0.0933, + "step": 163700 + }, + { + "epoch": 61.65, + "learning_rate": 7.670304855099738e-06, + "loss": 0.0929, + "step": 163800 + }, + { + "epoch": 61.69, + "learning_rate": 7.662777568686489e-06, + "loss": 0.0931, + "step": 163900 + }, + { + "epoch": 61.72, + "learning_rate": 7.655250282273241e-06, + "loss": 0.0947, + "step": 164000 + }, + { + "epoch": 61.76, + "learning_rate": 7.647722995859994e-06, + "loss": 0.0944, + "step": 164100 + }, + { + "epoch": 61.8, + "learning_rate": 7.640195709446744e-06, + "loss": 0.0929, + "step": 164200 + }, + { + "epoch": 61.84, + "learning_rate": 7.632668423033497e-06, + "loss": 0.0945, + "step": 164300 + }, + { + "epoch": 61.87, + "learning_rate": 7.6251411366202485e-06, + "loss": 0.0952, + "step": 164400 + }, + { + "epoch": 61.91, + "learning_rate": 7.617613850207001e-06, + "loss": 0.0939, + "step": 164500 + }, + { + "epoch": 61.95, + "learning_rate": 7.610086563793753e-06, + "loss": 0.0949, + "step": 164600 + }, + { + "epoch": 61.99, + "learning_rate": 7.602559277380504e-06, + "loss": 0.0939, + "step": 164700 + }, + { + "epoch": 62.0, + "eval_loss": 0.09392710030078888, + "eval_runtime": 45.1193, + "eval_samples_per_second": 166.226, + "eval_steps_per_second": 10.395, + "step": 164734 + }, + { + "epoch": 62.02, + "learning_rate": 7.595031990967257e-06, + "loss": 0.0937, + "step": 164800 + }, + { + "epoch": 62.06, + "learning_rate": 7.5875047045540086e-06, + "loss": 0.0934, + "step": 164900 + }, + { + "epoch": 62.1, + "learning_rate": 7.57997741814076e-06, + "loss": 0.094, + "step": 165000 + }, + { + "epoch": 62.14, + "learning_rate": 7.572450131727512e-06, + "loss": 0.0947, + "step": 165100 + }, + { + "epoch": 62.18, + "learning_rate": 7.564922845314265e-06, + "loss": 0.0926, + "step": 165200 + }, + { + "epoch": 62.21, + "learning_rate": 7.557395558901017e-06, + "loss": 0.0947, + "step": 165300 + }, + { + "epoch": 62.25, + "learning_rate": 7.5498682724877694e-06, + "loss": 0.0937, + "step": 165400 + }, + { + "epoch": 62.29, + "learning_rate": 7.542340986074521e-06, + "loss": 0.0914, + "step": 165500 + }, + { + "epoch": 62.33, + "learning_rate": 7.534813699661273e-06, + "loss": 0.0953, + "step": 165600 + }, + { + "epoch": 62.36, + "learning_rate": 7.527286413248025e-06, + "loss": 0.0952, + "step": 165700 + }, + { + "epoch": 62.4, + "learning_rate": 7.519759126834777e-06, + "loss": 0.0935, + "step": 165800 + }, + { + "epoch": 62.44, + "learning_rate": 7.512231840421529e-06, + "loss": 0.0947, + "step": 165900 + }, + { + "epoch": 62.48, + "learning_rate": 7.504704554008281e-06, + "loss": 0.0935, + "step": 166000 + }, + { + "epoch": 62.51, + "learning_rate": 7.497177267595033e-06, + "loss": 0.0957, + "step": 166100 + }, + { + "epoch": 62.55, + "learning_rate": 7.4896499811817845e-06, + "loss": 0.0926, + "step": 166200 + }, + { + "epoch": 62.59, + "learning_rate": 7.482122694768537e-06, + "loss": 0.0943, + "step": 166300 + }, + { + "epoch": 62.63, + "learning_rate": 7.474595408355289e-06, + "loss": 0.0938, + "step": 166400 + }, + { + "epoch": 62.66, + "learning_rate": 7.46706812194204e-06, + "loss": 0.0943, + "step": 166500 + }, + { + "epoch": 62.7, + "learning_rate": 7.459540835528792e-06, + "loss": 0.0924, + "step": 166600 + }, + { + "epoch": 62.74, + "learning_rate": 7.4520135491155445e-06, + "loss": 0.0929, + "step": 166700 + }, + { + "epoch": 62.78, + "learning_rate": 7.444486262702296e-06, + "loss": 0.0936, + "step": 166800 + }, + { + "epoch": 62.82, + "learning_rate": 7.436958976289048e-06, + "loss": 0.0931, + "step": 166900 + }, + { + "epoch": 62.85, + "learning_rate": 7.4294316898758e-06, + "loss": 0.0943, + "step": 167000 + }, + { + "epoch": 62.89, + "learning_rate": 7.421904403462552e-06, + "loss": 0.094, + "step": 167100 + }, + { + "epoch": 62.93, + "learning_rate": 7.414377117049304e-06, + "loss": 0.0931, + "step": 167200 + }, + { + "epoch": 62.97, + "learning_rate": 7.406849830636056e-06, + "loss": 0.0936, + "step": 167300 + }, + { + "epoch": 63.0, + "eval_loss": 0.09357059001922607, + "eval_runtime": 45.0624, + "eval_samples_per_second": 166.436, + "eval_steps_per_second": 10.408, + "step": 167391 + }, + { + "epoch": 63.0, + "learning_rate": 7.399322544222808e-06, + "loss": 0.0936, + "step": 167400 + }, + { + "epoch": 63.04, + "learning_rate": 7.3917952578095595e-06, + "loss": 0.094, + "step": 167500 + }, + { + "epoch": 63.08, + "learning_rate": 7.384267971396312e-06, + "loss": 0.0946, + "step": 167600 + }, + { + "epoch": 63.12, + "learning_rate": 7.3767406849830646e-06, + "loss": 0.0941, + "step": 167700 + }, + { + "epoch": 63.15, + "learning_rate": 7.369213398569817e-06, + "loss": 0.0943, + "step": 167800 + }, + { + "epoch": 63.19, + "learning_rate": 7.361686112156569e-06, + "loss": 0.0949, + "step": 167900 + }, + { + "epoch": 63.23, + "learning_rate": 7.35415882574332e-06, + "loss": 0.0945, + "step": 168000 + }, + { + "epoch": 63.27, + "learning_rate": 7.346631539330072e-06, + "loss": 0.0924, + "step": 168100 + }, + { + "epoch": 63.3, + "learning_rate": 7.3391042529168246e-06, + "loss": 0.094, + "step": 168200 + }, + { + "epoch": 63.34, + "learning_rate": 7.331576966503576e-06, + "loss": 0.0955, + "step": 168300 + }, + { + "epoch": 63.38, + "learning_rate": 7.324049680090328e-06, + "loss": 0.0937, + "step": 168400 + }, + { + "epoch": 63.42, + "learning_rate": 7.31652239367708e-06, + "loss": 0.0924, + "step": 168500 + }, + { + "epoch": 63.46, + "learning_rate": 7.308995107263832e-06, + "loss": 0.0943, + "step": 168600 + }, + { + "epoch": 63.49, + "learning_rate": 7.301467820850584e-06, + "loss": 0.094, + "step": 168700 + }, + { + "epoch": 63.53, + "learning_rate": 7.293940534437336e-06, + "loss": 0.0916, + "step": 168800 + }, + { + "epoch": 63.57, + "learning_rate": 7.286413248024088e-06, + "loss": 0.092, + "step": 168900 + }, + { + "epoch": 63.61, + "learning_rate": 7.27888596161084e-06, + "loss": 0.0951, + "step": 169000 + }, + { + "epoch": 63.64, + "learning_rate": 7.271358675197592e-06, + "loss": 0.0928, + "step": 169100 + }, + { + "epoch": 63.68, + "learning_rate": 7.263831388784344e-06, + "loss": 0.0938, + "step": 169200 + }, + { + "epoch": 63.72, + "learning_rate": 7.2563041023710954e-06, + "loss": 0.0936, + "step": 169300 + }, + { + "epoch": 63.76, + "learning_rate": 7.248776815957848e-06, + "loss": 0.0928, + "step": 169400 + }, + { + "epoch": 63.79, + "learning_rate": 7.2412495295446e-06, + "loss": 0.0944, + "step": 169500 + }, + { + "epoch": 63.83, + "learning_rate": 7.233722243131351e-06, + "loss": 0.0925, + "step": 169600 + }, + { + "epoch": 63.87, + "learning_rate": 7.226194956718103e-06, + "loss": 0.0932, + "step": 169700 + }, + { + "epoch": 63.91, + "learning_rate": 7.2186676703048555e-06, + "loss": 0.0934, + "step": 169800 + }, + { + "epoch": 63.94, + "learning_rate": 7.211140383891607e-06, + "loss": 0.0927, + "step": 169900 + }, + { + "epoch": 63.98, + "learning_rate": 7.203613097478359e-06, + "loss": 0.093, + "step": 170000 + }, + { + "epoch": 64.0, + "eval_loss": 0.09292689710855484, + "eval_runtime": 45.1577, + "eval_samples_per_second": 166.085, + "eval_steps_per_second": 10.386, + "step": 170048 + }, + { + "epoch": 64.02, + "learning_rate": 7.196085811065112e-06, + "loss": 0.0933, + "step": 170100 + }, + { + "epoch": 64.06, + "learning_rate": 7.188558524651864e-06, + "loss": 0.0938, + "step": 170200 + }, + { + "epoch": 64.09, + "learning_rate": 7.181031238238616e-06, + "loss": 0.0913, + "step": 170300 + }, + { + "epoch": 64.13, + "learning_rate": 7.173503951825368e-06, + "loss": 0.0919, + "step": 170400 + }, + { + "epoch": 64.17, + "learning_rate": 7.16597666541212e-06, + "loss": 0.0949, + "step": 170500 + }, + { + "epoch": 64.21, + "learning_rate": 7.158449378998872e-06, + "loss": 0.0938, + "step": 170600 + }, + { + "epoch": 64.25, + "learning_rate": 7.150922092585624e-06, + "loss": 0.0948, + "step": 170700 + }, + { + "epoch": 64.28, + "learning_rate": 7.1433948061723755e-06, + "loss": 0.093, + "step": 170800 + }, + { + "epoch": 64.32, + "learning_rate": 7.135867519759127e-06, + "loss": 0.0933, + "step": 170900 + }, + { + "epoch": 64.36, + "learning_rate": 7.12834023334588e-06, + "loss": 0.0915, + "step": 171000 + }, + { + "epoch": 64.4, + "learning_rate": 7.120812946932631e-06, + "loss": 0.093, + "step": 171100 + }, + { + "epoch": 64.43, + "learning_rate": 7.113285660519383e-06, + "loss": 0.0933, + "step": 171200 + }, + { + "epoch": 64.47, + "learning_rate": 7.1057583741061356e-06, + "loss": 0.0936, + "step": 171300 + }, + { + "epoch": 64.51, + "learning_rate": 7.098231087692887e-06, + "loss": 0.0935, + "step": 171400 + }, + { + "epoch": 64.55, + "learning_rate": 7.090703801279639e-06, + "loss": 0.094, + "step": 171500 + }, + { + "epoch": 64.58, + "learning_rate": 7.083176514866391e-06, + "loss": 0.0941, + "step": 171600 + }, + { + "epoch": 64.62, + "learning_rate": 7.075649228453143e-06, + "loss": 0.0946, + "step": 171700 + }, + { + "epoch": 64.66, + "learning_rate": 7.068121942039895e-06, + "loss": 0.0946, + "step": 171800 + }, + { + "epoch": 64.7, + "learning_rate": 7.060594655626647e-06, + "loss": 0.0926, + "step": 171900 + }, + { + "epoch": 64.73, + "learning_rate": 7.053067369213399e-06, + "loss": 0.0946, + "step": 172000 + }, + { + "epoch": 64.77, + "learning_rate": 7.045540082800151e-06, + "loss": 0.0937, + "step": 172100 + }, + { + "epoch": 64.81, + "learning_rate": 7.038012796386903e-06, + "loss": 0.0927, + "step": 172200 + }, + { + "epoch": 64.85, + "learning_rate": 7.030485509973655e-06, + "loss": 0.0945, + "step": 172300 + }, + { + "epoch": 64.89, + "learning_rate": 7.022958223560406e-06, + "loss": 0.0918, + "step": 172400 + }, + { + "epoch": 64.92, + "learning_rate": 7.015430937147158e-06, + "loss": 0.0923, + "step": 172500 + }, + { + "epoch": 64.96, + "learning_rate": 7.0079036507339114e-06, + "loss": 0.0926, + "step": 172600 + }, + { + "epoch": 65.0, + "learning_rate": 7.000376364320663e-06, + "loss": 0.0929, + "step": 172700 + }, + { + "epoch": 65.0, + "eval_loss": 0.0930134728550911, + "eval_runtime": 44.9287, + "eval_samples_per_second": 166.931, + "eval_steps_per_second": 10.439, + "step": 172705 + }, + { + "epoch": 65.04, + "learning_rate": 6.992849077907416e-06, + "loss": 0.0929, + "step": 172800 + }, + { + "epoch": 65.07, + "learning_rate": 6.985321791494167e-06, + "loss": 0.0932, + "step": 172900 + }, + { + "epoch": 65.11, + "learning_rate": 6.977794505080919e-06, + "loss": 0.0948, + "step": 173000 + }, + { + "epoch": 65.15, + "learning_rate": 6.9702672186676715e-06, + "loss": 0.093, + "step": 173100 + }, + { + "epoch": 65.19, + "learning_rate": 6.962739932254423e-06, + "loss": 0.0947, + "step": 173200 + }, + { + "epoch": 65.22, + "learning_rate": 6.955212645841175e-06, + "loss": 0.0925, + "step": 173300 + }, + { + "epoch": 65.26, + "learning_rate": 6.947685359427927e-06, + "loss": 0.093, + "step": 173400 + }, + { + "epoch": 65.3, + "learning_rate": 6.940158073014679e-06, + "loss": 0.0931, + "step": 173500 + }, + { + "epoch": 65.34, + "learning_rate": 6.932630786601431e-06, + "loss": 0.0944, + "step": 173600 + }, + { + "epoch": 65.37, + "learning_rate": 6.925103500188183e-06, + "loss": 0.0923, + "step": 173700 + }, + { + "epoch": 65.41, + "learning_rate": 6.917576213774935e-06, + "loss": 0.0919, + "step": 173800 + }, + { + "epoch": 65.45, + "learning_rate": 6.9100489273616865e-06, + "loss": 0.0945, + "step": 173900 + }, + { + "epoch": 65.49, + "learning_rate": 6.902521640948438e-06, + "loss": 0.0924, + "step": 174000 + }, + { + "epoch": 65.53, + "learning_rate": 6.894994354535191e-06, + "loss": 0.0941, + "step": 174100 + }, + { + "epoch": 65.56, + "learning_rate": 6.887467068121942e-06, + "loss": 0.0918, + "step": 174200 + }, + { + "epoch": 65.6, + "learning_rate": 6.879939781708694e-06, + "loss": 0.0935, + "step": 174300 + }, + { + "epoch": 65.64, + "learning_rate": 6.8724124952954465e-06, + "loss": 0.0944, + "step": 174400 + }, + { + "epoch": 65.68, + "learning_rate": 6.864885208882198e-06, + "loss": 0.0924, + "step": 174500 + }, + { + "epoch": 65.71, + "learning_rate": 6.85735792246895e-06, + "loss": 0.0927, + "step": 174600 + }, + { + "epoch": 65.75, + "learning_rate": 6.849830636055702e-06, + "loss": 0.094, + "step": 174700 + }, + { + "epoch": 65.79, + "learning_rate": 6.842303349642454e-06, + "loss": 0.0935, + "step": 174800 + }, + { + "epoch": 65.83, + "learning_rate": 6.834776063229206e-06, + "loss": 0.0927, + "step": 174900 + }, + { + "epoch": 65.86, + "learning_rate": 6.827248776815958e-06, + "loss": 0.0937, + "step": 175000 + }, + { + "epoch": 65.9, + "learning_rate": 6.819721490402711e-06, + "loss": 0.0938, + "step": 175100 + }, + { + "epoch": 65.94, + "learning_rate": 6.812194203989463e-06, + "loss": 0.0931, + "step": 175200 + }, + { + "epoch": 65.98, + "learning_rate": 6.804666917576215e-06, + "loss": 0.0917, + "step": 175300 + }, + { + "epoch": 66.0, + "eval_loss": 0.09251850843429565, + "eval_runtime": 44.9106, + "eval_samples_per_second": 166.998, + "eval_steps_per_second": 10.443, + "step": 175362 + }, + { + "epoch": 66.01, + "learning_rate": 6.797139631162967e-06, + "loss": 0.0936, + "step": 175400 + }, + { + "epoch": 66.05, + "learning_rate": 6.789612344749718e-06, + "loss": 0.0948, + "step": 175500 + }, + { + "epoch": 66.09, + "learning_rate": 6.782085058336471e-06, + "loss": 0.0945, + "step": 175600 + }, + { + "epoch": 66.13, + "learning_rate": 6.7745577719232224e-06, + "loss": 0.0937, + "step": 175700 + }, + { + "epoch": 66.16, + "learning_rate": 6.767030485509974e-06, + "loss": 0.0945, + "step": 175800 + }, + { + "epoch": 66.2, + "learning_rate": 6.759503199096727e-06, + "loss": 0.0932, + "step": 175900 + }, + { + "epoch": 66.24, + "learning_rate": 6.751975912683478e-06, + "loss": 0.0936, + "step": 176000 + }, + { + "epoch": 66.28, + "learning_rate": 6.74444862627023e-06, + "loss": 0.0933, + "step": 176100 + }, + { + "epoch": 66.32, + "learning_rate": 6.7369213398569825e-06, + "loss": 0.0926, + "step": 176200 + }, + { + "epoch": 66.35, + "learning_rate": 6.729394053443734e-06, + "loss": 0.093, + "step": 176300 + }, + { + "epoch": 66.39, + "learning_rate": 6.721866767030486e-06, + "loss": 0.0929, + "step": 176400 + }, + { + "epoch": 66.43, + "learning_rate": 6.714339480617238e-06, + "loss": 0.0934, + "step": 176500 + }, + { + "epoch": 66.47, + "learning_rate": 6.70681219420399e-06, + "loss": 0.0936, + "step": 176600 + }, + { + "epoch": 66.5, + "learning_rate": 6.699284907790742e-06, + "loss": 0.0916, + "step": 176700 + }, + { + "epoch": 66.54, + "learning_rate": 6.691757621377494e-06, + "loss": 0.0921, + "step": 176800 + }, + { + "epoch": 66.58, + "learning_rate": 6.684230334964246e-06, + "loss": 0.094, + "step": 176900 + }, + { + "epoch": 66.62, + "learning_rate": 6.6767030485509975e-06, + "loss": 0.0915, + "step": 177000 + }, + { + "epoch": 66.65, + "learning_rate": 6.669175762137749e-06, + "loss": 0.0919, + "step": 177100 + }, + { + "epoch": 66.69, + "learning_rate": 6.661648475724502e-06, + "loss": 0.0936, + "step": 177200 + }, + { + "epoch": 66.73, + "learning_rate": 6.654121189311253e-06, + "loss": 0.0927, + "step": 177300 + }, + { + "epoch": 66.77, + "learning_rate": 6.646593902898005e-06, + "loss": 0.0921, + "step": 177400 + }, + { + "epoch": 66.8, + "learning_rate": 6.6390666164847575e-06, + "loss": 0.0929, + "step": 177500 + }, + { + "epoch": 66.84, + "learning_rate": 6.63153933007151e-06, + "loss": 0.0934, + "step": 177600 + }, + { + "epoch": 66.88, + "learning_rate": 6.6240120436582625e-06, + "loss": 0.0932, + "step": 177700 + }, + { + "epoch": 66.92, + "learning_rate": 6.616484757245014e-06, + "loss": 0.0944, + "step": 177800 + }, + { + "epoch": 66.96, + "learning_rate": 6.608957470831766e-06, + "loss": 0.092, + "step": 177900 + }, + { + "epoch": 66.99, + "learning_rate": 6.601430184418518e-06, + "loss": 0.0948, + "step": 178000 + }, + { + "epoch": 67.0, + "eval_loss": 0.09316383302211761, + "eval_runtime": 44.8531, + "eval_samples_per_second": 167.212, + "eval_steps_per_second": 10.456, + "step": 178019 + }, + { + "epoch": 67.03, + "learning_rate": 6.59390289800527e-06, + "loss": 0.0931, + "step": 178100 + }, + { + "epoch": 67.07, + "learning_rate": 6.586375611592022e-06, + "loss": 0.0929, + "step": 178200 + }, + { + "epoch": 67.11, + "learning_rate": 6.578848325178774e-06, + "loss": 0.0933, + "step": 178300 + }, + { + "epoch": 67.14, + "learning_rate": 6.571321038765526e-06, + "loss": 0.0909, + "step": 178400 + }, + { + "epoch": 67.18, + "learning_rate": 6.5637937523522776e-06, + "loss": 0.093, + "step": 178500 + }, + { + "epoch": 67.22, + "learning_rate": 6.556266465939029e-06, + "loss": 0.0942, + "step": 178600 + }, + { + "epoch": 67.26, + "learning_rate": 6.548739179525782e-06, + "loss": 0.0926, + "step": 178700 + }, + { + "epoch": 67.29, + "learning_rate": 6.541211893112533e-06, + "loss": 0.0921, + "step": 178800 + }, + { + "epoch": 67.33, + "learning_rate": 6.533684606699285e-06, + "loss": 0.0932, + "step": 178900 + }, + { + "epoch": 67.37, + "learning_rate": 6.526157320286038e-06, + "loss": 0.0939, + "step": 179000 + }, + { + "epoch": 67.41, + "learning_rate": 6.518630033872789e-06, + "loss": 0.0943, + "step": 179100 + }, + { + "epoch": 67.44, + "learning_rate": 6.511102747459541e-06, + "loss": 0.0925, + "step": 179200 + }, + { + "epoch": 67.48, + "learning_rate": 6.5035754610462934e-06, + "loss": 0.092, + "step": 179300 + }, + { + "epoch": 67.52, + "learning_rate": 6.496048174633045e-06, + "loss": 0.093, + "step": 179400 + }, + { + "epoch": 67.56, + "learning_rate": 6.488520888219797e-06, + "loss": 0.0942, + "step": 179500 + }, + { + "epoch": 67.6, + "learning_rate": 6.480993601806549e-06, + "loss": 0.0933, + "step": 179600 + }, + { + "epoch": 67.63, + "learning_rate": 6.473466315393301e-06, + "loss": 0.0921, + "step": 179700 + }, + { + "epoch": 67.67, + "learning_rate": 6.465939028980053e-06, + "loss": 0.0928, + "step": 179800 + }, + { + "epoch": 67.71, + "learning_rate": 6.458411742566805e-06, + "loss": 0.0945, + "step": 179900 + }, + { + "epoch": 67.75, + "learning_rate": 6.450884456153557e-06, + "loss": 0.0925, + "step": 180000 + }, + { + "epoch": 67.78, + "learning_rate": 6.443357169740309e-06, + "loss": 0.0912, + "step": 180100 + }, + { + "epoch": 67.82, + "learning_rate": 6.435829883327062e-06, + "loss": 0.0917, + "step": 180200 + }, + { + "epoch": 67.86, + "learning_rate": 6.4283025969138135e-06, + "loss": 0.093, + "step": 180300 + }, + { + "epoch": 67.9, + "learning_rate": 6.420775310500565e-06, + "loss": 0.0928, + "step": 180400 + }, + { + "epoch": 67.93, + "learning_rate": 6.413248024087318e-06, + "loss": 0.0915, + "step": 180500 + }, + { + "epoch": 67.97, + "learning_rate": 6.405720737674069e-06, + "loss": 0.0931, + "step": 180600 + }, + { + "epoch": 68.0, + "eval_loss": 0.09266681969165802, + "eval_runtime": 44.9069, + "eval_samples_per_second": 167.012, + "eval_steps_per_second": 10.444, + "step": 180676 + }, + { + "epoch": 68.01, + "learning_rate": 6.398193451260821e-06, + "loss": 0.0939, + "step": 180700 + }, + { + "epoch": 68.05, + "learning_rate": 6.3906661648475735e-06, + "loss": 0.0933, + "step": 180800 + }, + { + "epoch": 68.08, + "learning_rate": 6.383138878434325e-06, + "loss": 0.0922, + "step": 180900 + }, + { + "epoch": 68.12, + "learning_rate": 6.375611592021077e-06, + "loss": 0.0922, + "step": 181000 + }, + { + "epoch": 68.16, + "learning_rate": 6.368084305607829e-06, + "loss": 0.0935, + "step": 181100 + }, + { + "epoch": 68.2, + "learning_rate": 6.360557019194581e-06, + "loss": 0.0938, + "step": 181200 + }, + { + "epoch": 68.23, + "learning_rate": 6.353029732781333e-06, + "loss": 0.0929, + "step": 181300 + }, + { + "epoch": 68.27, + "learning_rate": 6.345502446368084e-06, + "loss": 0.093, + "step": 181400 + }, + { + "epoch": 68.31, + "learning_rate": 6.337975159954837e-06, + "loss": 0.0932, + "step": 181500 + }, + { + "epoch": 68.35, + "learning_rate": 6.3304478735415885e-06, + "loss": 0.0914, + "step": 181600 + }, + { + "epoch": 68.39, + "learning_rate": 6.32292058712834e-06, + "loss": 0.0926, + "step": 181700 + }, + { + "epoch": 68.42, + "learning_rate": 6.315393300715093e-06, + "loss": 0.0935, + "step": 181800 + }, + { + "epoch": 68.46, + "learning_rate": 6.307866014301844e-06, + "loss": 0.0923, + "step": 181900 + }, + { + "epoch": 68.5, + "learning_rate": 6.300338727888596e-06, + "loss": 0.0921, + "step": 182000 + }, + { + "epoch": 68.54, + "learning_rate": 6.2928114414753486e-06, + "loss": 0.0929, + "step": 182100 + }, + { + "epoch": 68.57, + "learning_rate": 6.2852841550621e-06, + "loss": 0.0935, + "step": 182200 + }, + { + "epoch": 68.61, + "learning_rate": 6.277756868648852e-06, + "loss": 0.0923, + "step": 182300 + }, + { + "epoch": 68.65, + "learning_rate": 6.270229582235604e-06, + "loss": 0.093, + "step": 182400 + }, + { + "epoch": 68.69, + "learning_rate": 6.262702295822356e-06, + "loss": 0.0912, + "step": 182500 + }, + { + "epoch": 68.72, + "learning_rate": 6.2551750094091094e-06, + "loss": 0.0922, + "step": 182600 + }, + { + "epoch": 68.76, + "learning_rate": 6.247647722995861e-06, + "loss": 0.0919, + "step": 182700 + }, + { + "epoch": 68.8, + "learning_rate": 6.240120436582613e-06, + "loss": 0.0933, + "step": 182800 + }, + { + "epoch": 68.84, + "learning_rate": 6.2325931501693644e-06, + "loss": 0.0936, + "step": 182900 + }, + { + "epoch": 68.87, + "learning_rate": 6.225065863756117e-06, + "loss": 0.0935, + "step": 183000 + }, + { + "epoch": 68.91, + "learning_rate": 6.217538577342869e-06, + "loss": 0.0917, + "step": 183100 + }, + { + "epoch": 68.95, + "learning_rate": 6.21001129092962e-06, + "loss": 0.0916, + "step": 183200 + }, + { + "epoch": 68.99, + "learning_rate": 6.202484004516373e-06, + "loss": 0.0911, + "step": 183300 + }, + { + "epoch": 69.0, + "eval_loss": 0.092154860496521, + "eval_runtime": 44.9385, + "eval_samples_per_second": 166.895, + "eval_steps_per_second": 10.436, + "step": 183333 + }, + { + "epoch": 69.03, + "learning_rate": 6.1949567181031245e-06, + "loss": 0.0919, + "step": 183400 + }, + { + "epoch": 69.06, + "learning_rate": 6.187429431689876e-06, + "loss": 0.0931, + "step": 183500 + }, + { + "epoch": 69.1, + "learning_rate": 6.179902145276629e-06, + "loss": 0.0923, + "step": 183600 + }, + { + "epoch": 69.14, + "learning_rate": 6.17237485886338e-06, + "loss": 0.0927, + "step": 183700 + }, + { + "epoch": 69.18, + "learning_rate": 6.164847572450132e-06, + "loss": 0.0942, + "step": 183800 + }, + { + "epoch": 69.21, + "learning_rate": 6.1573202860368845e-06, + "loss": 0.0926, + "step": 183900 + }, + { + "epoch": 69.25, + "learning_rate": 6.149792999623636e-06, + "loss": 0.0943, + "step": 184000 + }, + { + "epoch": 69.29, + "learning_rate": 6.142265713210388e-06, + "loss": 0.0918, + "step": 184100 + }, + { + "epoch": 69.33, + "learning_rate": 6.13473842679714e-06, + "loss": 0.0933, + "step": 184200 + }, + { + "epoch": 69.36, + "learning_rate": 6.127211140383892e-06, + "loss": 0.093, + "step": 184300 + }, + { + "epoch": 69.4, + "learning_rate": 6.119683853970644e-06, + "loss": 0.0927, + "step": 184400 + }, + { + "epoch": 69.44, + "learning_rate": 6.112156567557395e-06, + "loss": 0.0925, + "step": 184500 + }, + { + "epoch": 69.48, + "learning_rate": 6.104629281144148e-06, + "loss": 0.0922, + "step": 184600 + }, + { + "epoch": 69.51, + "learning_rate": 6.0971019947308995e-06, + "loss": 0.0926, + "step": 184700 + }, + { + "epoch": 69.55, + "learning_rate": 6.089574708317651e-06, + "loss": 0.0928, + "step": 184800 + }, + { + "epoch": 69.59, + "learning_rate": 6.082047421904404e-06, + "loss": 0.092, + "step": 184900 + }, + { + "epoch": 69.63, + "learning_rate": 6.074520135491156e-06, + "loss": 0.0934, + "step": 185000 + }, + { + "epoch": 69.67, + "learning_rate": 6.066992849077909e-06, + "loss": 0.0927, + "step": 185100 + }, + { + "epoch": 69.7, + "learning_rate": 6.05946556266466e-06, + "loss": 0.0923, + "step": 185200 + }, + { + "epoch": 69.74, + "learning_rate": 6.051938276251412e-06, + "loss": 0.0933, + "step": 185300 + }, + { + "epoch": 69.78, + "learning_rate": 6.0444109898381646e-06, + "loss": 0.0914, + "step": 185400 + }, + { + "epoch": 69.82, + "learning_rate": 6.036883703424916e-06, + "loss": 0.0928, + "step": 185500 + }, + { + "epoch": 69.85, + "learning_rate": 6.029356417011668e-06, + "loss": 0.0917, + "step": 185600 + }, + { + "epoch": 69.89, + "learning_rate": 6.02182913059842e-06, + "loss": 0.0932, + "step": 185700 + }, + { + "epoch": 69.93, + "learning_rate": 6.014301844185172e-06, + "loss": 0.0917, + "step": 185800 + }, + { + "epoch": 69.97, + "learning_rate": 6.006774557771924e-06, + "loss": 0.0923, + "step": 185900 + }, + { + "epoch": 70.0, + "eval_loss": 0.0924314558506012, + "eval_runtime": 45.1886, + "eval_samples_per_second": 165.971, + "eval_steps_per_second": 10.379, + "step": 185990 + }, + { + "epoch": 70.0, + "learning_rate": 5.999247271358675e-06, + "loss": 0.0933, + "step": 186000 + }, + { + "epoch": 70.04, + "learning_rate": 5.991719984945428e-06, + "loss": 0.0918, + "step": 186100 + }, + { + "epoch": 70.08, + "learning_rate": 5.98419269853218e-06, + "loss": 0.0919, + "step": 186200 + }, + { + "epoch": 70.12, + "learning_rate": 5.976665412118931e-06, + "loss": 0.0942, + "step": 186300 + }, + { + "epoch": 70.15, + "learning_rate": 5.969138125705684e-06, + "loss": 0.092, + "step": 186400 + }, + { + "epoch": 70.19, + "learning_rate": 5.9616108392924354e-06, + "loss": 0.0923, + "step": 186500 + }, + { + "epoch": 70.23, + "learning_rate": 5.954083552879187e-06, + "loss": 0.0929, + "step": 186600 + }, + { + "epoch": 70.27, + "learning_rate": 5.94655626646594e-06, + "loss": 0.0932, + "step": 186700 + }, + { + "epoch": 70.3, + "learning_rate": 5.939028980052691e-06, + "loss": 0.0936, + "step": 186800 + }, + { + "epoch": 70.34, + "learning_rate": 5.931501693639443e-06, + "loss": 0.0931, + "step": 186900 + }, + { + "epoch": 70.38, + "learning_rate": 5.9239744072261955e-06, + "loss": 0.0919, + "step": 187000 + }, + { + "epoch": 70.42, + "learning_rate": 5.916447120812947e-06, + "loss": 0.092, + "step": 187100 + }, + { + "epoch": 70.46, + "learning_rate": 5.908919834399699e-06, + "loss": 0.0949, + "step": 187200 + }, + { + "epoch": 70.49, + "learning_rate": 5.901392547986451e-06, + "loss": 0.0931, + "step": 187300 + }, + { + "epoch": 70.53, + "learning_rate": 5.893865261573203e-06, + "loss": 0.0927, + "step": 187400 + }, + { + "epoch": 70.57, + "learning_rate": 5.8863379751599555e-06, + "loss": 0.0918, + "step": 187500 + }, + { + "epoch": 70.61, + "learning_rate": 5.878810688746708e-06, + "loss": 0.0916, + "step": 187600 + }, + { + "epoch": 70.64, + "learning_rate": 5.87128340233346e-06, + "loss": 0.0926, + "step": 187700 + }, + { + "epoch": 70.68, + "learning_rate": 5.863756115920211e-06, + "loss": 0.0913, + "step": 187800 + }, + { + "epoch": 70.72, + "learning_rate": 5.856228829506964e-06, + "loss": 0.093, + "step": 187900 + }, + { + "epoch": 70.76, + "learning_rate": 5.8487015430937155e-06, + "loss": 0.091, + "step": 188000 + }, + { + "epoch": 70.79, + "learning_rate": 5.841174256680467e-06, + "loss": 0.0941, + "step": 188100 + }, + { + "epoch": 70.83, + "learning_rate": 5.83364697026722e-06, + "loss": 0.0933, + "step": 188200 + }, + { + "epoch": 70.87, + "learning_rate": 5.826119683853971e-06, + "loss": 0.0916, + "step": 188300 + }, + { + "epoch": 70.91, + "learning_rate": 5.818592397440723e-06, + "loss": 0.0946, + "step": 188400 + }, + { + "epoch": 70.94, + "learning_rate": 5.8110651110274755e-06, + "loss": 0.0927, + "step": 188500 + }, + { + "epoch": 70.98, + "learning_rate": 5.803537824614227e-06, + "loss": 0.0923, + "step": 188600 + }, + { + "epoch": 71.0, + "eval_loss": 0.09234917163848877, + "eval_runtime": 45.0733, + "eval_samples_per_second": 166.395, + "eval_steps_per_second": 10.405, + "step": 188647 + }, + { + "epoch": 71.02, + "learning_rate": 5.796010538200979e-06, + "loss": 0.0929, + "step": 188700 + }, + { + "epoch": 71.06, + "learning_rate": 5.788483251787731e-06, + "loss": 0.0928, + "step": 188800 + }, + { + "epoch": 71.1, + "learning_rate": 5.780955965374483e-06, + "loss": 0.0925, + "step": 188900 + }, + { + "epoch": 71.13, + "learning_rate": 5.773428678961235e-06, + "loss": 0.0928, + "step": 189000 + }, + { + "epoch": 71.17, + "learning_rate": 5.765901392547986e-06, + "loss": 0.0943, + "step": 189100 + }, + { + "epoch": 71.21, + "learning_rate": 5.758374106134739e-06, + "loss": 0.092, + "step": 189200 + }, + { + "epoch": 71.25, + "learning_rate": 5.7508468197214906e-06, + "loss": 0.0928, + "step": 189300 + }, + { + "epoch": 71.28, + "learning_rate": 5.743319533308242e-06, + "loss": 0.0917, + "step": 189400 + }, + { + "epoch": 71.32, + "learning_rate": 5.735792246894995e-06, + "loss": 0.0919, + "step": 189500 + }, + { + "epoch": 71.36, + "learning_rate": 5.728264960481746e-06, + "loss": 0.0922, + "step": 189600 + }, + { + "epoch": 71.4, + "learning_rate": 5.720737674068498e-06, + "loss": 0.0931, + "step": 189700 + }, + { + "epoch": 71.43, + "learning_rate": 5.713210387655251e-06, + "loss": 0.0922, + "step": 189800 + }, + { + "epoch": 71.47, + "learning_rate": 5.705683101242002e-06, + "loss": 0.0908, + "step": 189900 + }, + { + "epoch": 71.51, + "learning_rate": 5.698155814828756e-06, + "loss": 0.0909, + "step": 190000 + }, + { + "epoch": 71.55, + "learning_rate": 5.690628528415507e-06, + "loss": 0.0922, + "step": 190100 + }, + { + "epoch": 71.58, + "learning_rate": 5.683101242002259e-06, + "loss": 0.0941, + "step": 190200 + }, + { + "epoch": 71.62, + "learning_rate": 5.675573955589011e-06, + "loss": 0.0931, + "step": 190300 + }, + { + "epoch": 71.66, + "learning_rate": 5.668046669175763e-06, + "loss": 0.092, + "step": 190400 + }, + { + "epoch": 71.7, + "learning_rate": 5.660519382762515e-06, + "loss": 0.0903, + "step": 190500 + }, + { + "epoch": 71.74, + "learning_rate": 5.6529920963492665e-06, + "loss": 0.0917, + "step": 190600 + }, + { + "epoch": 71.77, + "learning_rate": 5.645464809936019e-06, + "loss": 0.0911, + "step": 190700 + }, + { + "epoch": 71.81, + "learning_rate": 5.637937523522771e-06, + "loss": 0.0924, + "step": 190800 + }, + { + "epoch": 71.85, + "learning_rate": 5.630410237109522e-06, + "loss": 0.0922, + "step": 190900 + }, + { + "epoch": 71.89, + "learning_rate": 5.622882950696275e-06, + "loss": 0.0935, + "step": 191000 + }, + { + "epoch": 71.92, + "learning_rate": 5.6153556642830265e-06, + "loss": 0.0912, + "step": 191100 + }, + { + "epoch": 71.96, + "learning_rate": 5.607828377869778e-06, + "loss": 0.0919, + "step": 191200 + }, + { + "epoch": 72.0, + "learning_rate": 5.600301091456531e-06, + "loss": 0.0929, + "step": 191300 + }, + { + "epoch": 72.0, + "eval_loss": 0.09194895625114441, + "eval_runtime": 45.1877, + "eval_samples_per_second": 165.975, + "eval_steps_per_second": 10.379, + "step": 191304 + }, + { + "epoch": 72.04, + "learning_rate": 5.592773805043282e-06, + "loss": 0.0934, + "step": 191400 + }, + { + "epoch": 72.07, + "learning_rate": 5.585246518630034e-06, + "loss": 0.0922, + "step": 191500 + }, + { + "epoch": 72.11, + "learning_rate": 5.5777192322167865e-06, + "loss": 0.0934, + "step": 191600 + }, + { + "epoch": 72.15, + "learning_rate": 5.570191945803538e-06, + "loss": 0.0902, + "step": 191700 + }, + { + "epoch": 72.19, + "learning_rate": 5.56266465939029e-06, + "loss": 0.0921, + "step": 191800 + }, + { + "epoch": 72.22, + "learning_rate": 5.5551373729770415e-06, + "loss": 0.0922, + "step": 191900 + }, + { + "epoch": 72.26, + "learning_rate": 5.547610086563794e-06, + "loss": 0.0925, + "step": 192000 + }, + { + "epoch": 72.3, + "learning_rate": 5.540082800150546e-06, + "loss": 0.0927, + "step": 192100 + }, + { + "epoch": 72.34, + "learning_rate": 5.532555513737297e-06, + "loss": 0.0923, + "step": 192200 + }, + { + "epoch": 72.37, + "learning_rate": 5.52502822732405e-06, + "loss": 0.0924, + "step": 192300 + }, + { + "epoch": 72.41, + "learning_rate": 5.5175009409108015e-06, + "loss": 0.0919, + "step": 192400 + }, + { + "epoch": 72.45, + "learning_rate": 5.509973654497555e-06, + "loss": 0.0937, + "step": 192500 + }, + { + "epoch": 72.49, + "learning_rate": 5.5024463680843066e-06, + "loss": 0.0919, + "step": 192600 + }, + { + "epoch": 72.53, + "learning_rate": 5.494919081671058e-06, + "loss": 0.0922, + "step": 192700 + }, + { + "epoch": 72.56, + "learning_rate": 5.487391795257811e-06, + "loss": 0.0925, + "step": 192800 + }, + { + "epoch": 72.6, + "learning_rate": 5.479864508844562e-06, + "loss": 0.0919, + "step": 192900 + }, + { + "epoch": 72.64, + "learning_rate": 5.472337222431314e-06, + "loss": 0.0908, + "step": 193000 + }, + { + "epoch": 72.68, + "learning_rate": 5.464809936018067e-06, + "loss": 0.0925, + "step": 193100 + }, + { + "epoch": 72.71, + "learning_rate": 5.457282649604818e-06, + "loss": 0.0921, + "step": 193200 + }, + { + "epoch": 72.75, + "learning_rate": 5.44975536319157e-06, + "loss": 0.0909, + "step": 193300 + }, + { + "epoch": 72.79, + "learning_rate": 5.442228076778322e-06, + "loss": 0.0938, + "step": 193400 + }, + { + "epoch": 72.83, + "learning_rate": 5.434700790365074e-06, + "loss": 0.0919, + "step": 193500 + }, + { + "epoch": 72.86, + "learning_rate": 5.427173503951826e-06, + "loss": 0.0941, + "step": 193600 + }, + { + "epoch": 72.9, + "learning_rate": 5.4196462175385774e-06, + "loss": 0.0916, + "step": 193700 + }, + { + "epoch": 72.94, + "learning_rate": 5.41211893112533e-06, + "loss": 0.0922, + "step": 193800 + }, + { + "epoch": 72.98, + "learning_rate": 5.404591644712082e-06, + "loss": 0.0916, + "step": 193900 + }, + { + "epoch": 73.0, + "eval_loss": 0.09231603145599365, + "eval_runtime": 45.0139, + "eval_samples_per_second": 166.615, + "eval_steps_per_second": 10.419, + "step": 193961 + }, + { + "epoch": 73.01, + "learning_rate": 5.397064358298833e-06, + "loss": 0.093, + "step": 194000 + }, + { + "epoch": 73.05, + "learning_rate": 5.389537071885586e-06, + "loss": 0.0926, + "step": 194100 + }, + { + "epoch": 73.09, + "learning_rate": 5.3820097854723375e-06, + "loss": 0.0935, + "step": 194200 + }, + { + "epoch": 73.13, + "learning_rate": 5.374482499059089e-06, + "loss": 0.0924, + "step": 194300 + }, + { + "epoch": 73.17, + "learning_rate": 5.366955212645842e-06, + "loss": 0.0918, + "step": 194400 + }, + { + "epoch": 73.2, + "learning_rate": 5.359427926232593e-06, + "loss": 0.0929, + "step": 194500 + }, + { + "epoch": 73.24, + "learning_rate": 5.351900639819345e-06, + "loss": 0.0932, + "step": 194600 + }, + { + "epoch": 73.28, + "learning_rate": 5.3443733534060975e-06, + "loss": 0.092, + "step": 194700 + }, + { + "epoch": 73.32, + "learning_rate": 5.336846066992849e-06, + "loss": 0.0913, + "step": 194800 + }, + { + "epoch": 73.35, + "learning_rate": 5.329318780579601e-06, + "loss": 0.093, + "step": 194900 + }, + { + "epoch": 73.39, + "learning_rate": 5.321791494166354e-06, + "loss": 0.0922, + "step": 195000 + }, + { + "epoch": 73.43, + "learning_rate": 5.314264207753106e-06, + "loss": 0.0898, + "step": 195100 + }, + { + "epoch": 73.47, + "learning_rate": 5.3067369213398575e-06, + "loss": 0.0921, + "step": 195200 + }, + { + "epoch": 73.5, + "learning_rate": 5.29920963492661e-06, + "loss": 0.0927, + "step": 195300 + }, + { + "epoch": 73.54, + "learning_rate": 5.291682348513362e-06, + "loss": 0.0932, + "step": 195400 + }, + { + "epoch": 73.58, + "learning_rate": 5.284155062100113e-06, + "loss": 0.092, + "step": 195500 + }, + { + "epoch": 73.62, + "learning_rate": 5.276627775686866e-06, + "loss": 0.091, + "step": 195600 + }, + { + "epoch": 73.65, + "learning_rate": 5.2691004892736175e-06, + "loss": 0.093, + "step": 195700 + }, + { + "epoch": 73.69, + "learning_rate": 5.261573202860369e-06, + "loss": 0.0943, + "step": 195800 + }, + { + "epoch": 73.73, + "learning_rate": 5.254045916447122e-06, + "loss": 0.0913, + "step": 195900 + }, + { + "epoch": 73.77, + "learning_rate": 5.246518630033873e-06, + "loss": 0.0905, + "step": 196000 + }, + { + "epoch": 73.81, + "learning_rate": 5.238991343620625e-06, + "loss": 0.0905, + "step": 196100 + }, + { + "epoch": 73.84, + "learning_rate": 5.2314640572073776e-06, + "loss": 0.0923, + "step": 196200 + }, + { + "epoch": 73.88, + "learning_rate": 5.223936770794129e-06, + "loss": 0.0923, + "step": 196300 + }, + { + "epoch": 73.92, + "learning_rate": 5.216409484380881e-06, + "loss": 0.0927, + "step": 196400 + }, + { + "epoch": 73.96, + "learning_rate": 5.2088821979676326e-06, + "loss": 0.093, + "step": 196500 + }, + { + "epoch": 73.99, + "learning_rate": 5.201354911554385e-06, + "loss": 0.0927, + "step": 196600 + }, + { + "epoch": 74.0, + "eval_loss": 0.0920698270201683, + "eval_runtime": 45.1732, + "eval_samples_per_second": 166.028, + "eval_steps_per_second": 10.382, + "step": 196618 + }, + { + "epoch": 74.03, + "learning_rate": 5.193827625141137e-06, + "loss": 0.0911, + "step": 196700 + }, + { + "epoch": 74.07, + "learning_rate": 5.186300338727888e-06, + "loss": 0.0921, + "step": 196800 + }, + { + "epoch": 74.11, + "learning_rate": 5.178773052314641e-06, + "loss": 0.0924, + "step": 196900 + }, + { + "epoch": 74.14, + "learning_rate": 5.171245765901393e-06, + "loss": 0.0937, + "step": 197000 + }, + { + "epoch": 74.18, + "learning_rate": 5.163718479488144e-06, + "loss": 0.0927, + "step": 197100 + }, + { + "epoch": 74.22, + "learning_rate": 5.156191193074897e-06, + "loss": 0.0932, + "step": 197200 + }, + { + "epoch": 74.26, + "learning_rate": 5.1486639066616484e-06, + "loss": 0.091, + "step": 197300 + }, + { + "epoch": 74.29, + "learning_rate": 5.1411366202484e-06, + "loss": 0.0914, + "step": 197400 + }, + { + "epoch": 74.33, + "learning_rate": 5.1336093338351535e-06, + "loss": 0.0921, + "step": 197500 + }, + { + "epoch": 74.37, + "learning_rate": 5.126082047421905e-06, + "loss": 0.0932, + "step": 197600 + }, + { + "epoch": 74.41, + "learning_rate": 5.118554761008658e-06, + "loss": 0.0928, + "step": 197700 + }, + { + "epoch": 74.44, + "learning_rate": 5.111027474595409e-06, + "loss": 0.0918, + "step": 197800 + }, + { + "epoch": 74.48, + "learning_rate": 5.103500188182161e-06, + "loss": 0.0894, + "step": 197900 + }, + { + "epoch": 74.52, + "learning_rate": 5.095972901768913e-06, + "loss": 0.091, + "step": 198000 + }, + { + "epoch": 74.56, + "learning_rate": 5.088445615355665e-06, + "loss": 0.0929, + "step": 198100 + }, + { + "epoch": 74.6, + "learning_rate": 5.080918328942417e-06, + "loss": 0.0931, + "step": 198200 + }, + { + "epoch": 74.63, + "learning_rate": 5.0733910425291685e-06, + "loss": 0.092, + "step": 198300 + }, + { + "epoch": 74.67, + "learning_rate": 5.065863756115921e-06, + "loss": 0.0916, + "step": 198400 + }, + { + "epoch": 74.71, + "learning_rate": 5.058336469702673e-06, + "loss": 0.0914, + "step": 198500 + }, + { + "epoch": 74.75, + "learning_rate": 5.050809183289424e-06, + "loss": 0.0929, + "step": 198600 + }, + { + "epoch": 74.78, + "learning_rate": 5.043281896876177e-06, + "loss": 0.0913, + "step": 198700 + }, + { + "epoch": 74.82, + "learning_rate": 5.0357546104629285e-06, + "loss": 0.0918, + "step": 198800 + }, + { + "epoch": 74.86, + "learning_rate": 5.02822732404968e-06, + "loss": 0.0922, + "step": 198900 + }, + { + "epoch": 74.9, + "learning_rate": 5.020700037636433e-06, + "loss": 0.0918, + "step": 199000 + }, + { + "epoch": 74.93, + "learning_rate": 5.013172751223184e-06, + "loss": 0.0918, + "step": 199100 + }, + { + "epoch": 74.97, + "learning_rate": 5.005645464809936e-06, + "loss": 0.0907, + "step": 199200 + }, + { + "epoch": 75.0, + "eval_loss": 0.09217877686023712, + "eval_runtime": 44.7295, + "eval_samples_per_second": 167.675, + "eval_steps_per_second": 10.485, + "step": 199275 + }, + { + "epoch": 75.01, + "learning_rate": 4.9981181783966885e-06, + "loss": 0.0917, + "step": 199300 + }, + { + "epoch": 75.05, + "learning_rate": 4.99059089198344e-06, + "loss": 0.0921, + "step": 199400 + }, + { + "epoch": 75.08, + "learning_rate": 4.983063605570193e-06, + "loss": 0.0925, + "step": 199500 + }, + { + "epoch": 75.12, + "learning_rate": 4.975536319156944e-06, + "loss": 0.0928, + "step": 199600 + }, + { + "epoch": 75.16, + "learning_rate": 4.968009032743696e-06, + "loss": 0.092, + "step": 199700 + }, + { + "epoch": 75.2, + "learning_rate": 4.9604817463304486e-06, + "loss": 0.0913, + "step": 199800 + }, + { + "epoch": 75.24, + "learning_rate": 4.9529544599172e-06, + "loss": 0.0928, + "step": 199900 + }, + { + "epoch": 75.27, + "learning_rate": 4.945427173503952e-06, + "loss": 0.0917, + "step": 200000 + }, + { + "epoch": 75.31, + "learning_rate": 4.937899887090704e-06, + "loss": 0.0928, + "step": 200100 + }, + { + "epoch": 75.35, + "learning_rate": 4.930372600677456e-06, + "loss": 0.0919, + "step": 200200 + }, + { + "epoch": 75.39, + "learning_rate": 4.922845314264208e-06, + "loss": 0.0919, + "step": 200300 + }, + { + "epoch": 75.42, + "learning_rate": 4.91531802785096e-06, + "loss": 0.092, + "step": 200400 + }, + { + "epoch": 75.46, + "learning_rate": 4.907790741437712e-06, + "loss": 0.0902, + "step": 200500 + }, + { + "epoch": 75.5, + "learning_rate": 4.9002634550244644e-06, + "loss": 0.0911, + "step": 200600 + }, + { + "epoch": 75.54, + "learning_rate": 4.892736168611216e-06, + "loss": 0.093, + "step": 200700 + }, + { + "epoch": 75.57, + "learning_rate": 4.885208882197968e-06, + "loss": 0.0917, + "step": 200800 + }, + { + "epoch": 75.61, + "learning_rate": 4.87768159578472e-06, + "loss": 0.0911, + "step": 200900 + }, + { + "epoch": 75.65, + "learning_rate": 4.870154309371472e-06, + "loss": 0.0927, + "step": 201000 + }, + { + "epoch": 75.69, + "learning_rate": 4.862627022958224e-06, + "loss": 0.0917, + "step": 201100 + }, + { + "epoch": 75.72, + "learning_rate": 4.855099736544976e-06, + "loss": 0.0936, + "step": 201200 + }, + { + "epoch": 75.76, + "learning_rate": 4.847572450131728e-06, + "loss": 0.0938, + "step": 201300 + }, + { + "epoch": 75.8, + "learning_rate": 4.8400451637184795e-06, + "loss": 0.0896, + "step": 201400 + }, + { + "epoch": 75.84, + "learning_rate": 4.832517877305232e-06, + "loss": 0.0934, + "step": 201500 + }, + { + "epoch": 75.88, + "learning_rate": 4.824990590891984e-06, + "loss": 0.0902, + "step": 201600 + }, + { + "epoch": 75.91, + "learning_rate": 4.817463304478735e-06, + "loss": 0.0906, + "step": 201700 + }, + { + "epoch": 75.95, + "learning_rate": 4.809936018065488e-06, + "loss": 0.0906, + "step": 201800 + }, + { + "epoch": 75.99, + "learning_rate": 4.80240873165224e-06, + "loss": 0.0927, + "step": 201900 + }, + { + "epoch": 76.0, + "eval_loss": 0.09185120463371277, + "eval_runtime": 44.8491, + "eval_samples_per_second": 167.228, + "eval_steps_per_second": 10.457, + "step": 201932 + }, + { + "epoch": 76.03, + "learning_rate": 4.794881445238992e-06, + "loss": 0.0924, + "step": 202000 + }, + { + "epoch": 76.06, + "learning_rate": 4.787354158825744e-06, + "loss": 0.0922, + "step": 202100 + }, + { + "epoch": 76.1, + "learning_rate": 4.779826872412496e-06, + "loss": 0.0933, + "step": 202200 + }, + { + "epoch": 76.14, + "learning_rate": 4.772299585999248e-06, + "loss": 0.0922, + "step": 202300 + }, + { + "epoch": 76.18, + "learning_rate": 4.7647722995859995e-06, + "loss": 0.0916, + "step": 202400 + }, + { + "epoch": 76.21, + "learning_rate": 4.757245013172751e-06, + "loss": 0.0916, + "step": 202500 + }, + { + "epoch": 76.25, + "learning_rate": 4.749717726759504e-06, + "loss": 0.0931, + "step": 202600 + }, + { + "epoch": 76.29, + "learning_rate": 4.742190440346255e-06, + "loss": 0.0907, + "step": 202700 + }, + { + "epoch": 76.33, + "learning_rate": 4.734663153933007e-06, + "loss": 0.092, + "step": 202800 + }, + { + "epoch": 76.36, + "learning_rate": 4.7271358675197595e-06, + "loss": 0.0912, + "step": 202900 + }, + { + "epoch": 76.4, + "learning_rate": 4.719608581106512e-06, + "loss": 0.0924, + "step": 203000 + }, + { + "epoch": 76.44, + "learning_rate": 4.712081294693264e-06, + "loss": 0.0915, + "step": 203100 + }, + { + "epoch": 76.48, + "learning_rate": 4.704554008280015e-06, + "loss": 0.0908, + "step": 203200 + }, + { + "epoch": 76.51, + "learning_rate": 4.697026721866768e-06, + "loss": 0.0919, + "step": 203300 + }, + { + "epoch": 76.55, + "learning_rate": 4.6894994354535196e-06, + "loss": 0.0912, + "step": 203400 + }, + { + "epoch": 76.59, + "learning_rate": 4.681972149040271e-06, + "loss": 0.092, + "step": 203500 + }, + { + "epoch": 76.63, + "learning_rate": 4.674444862627024e-06, + "loss": 0.091, + "step": 203600 + }, + { + "epoch": 76.67, + "learning_rate": 4.666917576213775e-06, + "loss": 0.0928, + "step": 203700 + }, + { + "epoch": 76.7, + "learning_rate": 4.659390289800527e-06, + "loss": 0.0902, + "step": 203800 + }, + { + "epoch": 76.74, + "learning_rate": 4.651863003387279e-06, + "loss": 0.0921, + "step": 203900 + }, + { + "epoch": 76.78, + "learning_rate": 4.644335716974031e-06, + "loss": 0.0899, + "step": 204000 + }, + { + "epoch": 76.82, + "learning_rate": 4.636808430560783e-06, + "loss": 0.0897, + "step": 204100 + }, + { + "epoch": 76.85, + "learning_rate": 4.629281144147535e-06, + "loss": 0.0909, + "step": 204200 + }, + { + "epoch": 76.89, + "learning_rate": 4.621753857734287e-06, + "loss": 0.0933, + "step": 204300 + }, + { + "epoch": 76.93, + "learning_rate": 4.61422657132104e-06, + "loss": 0.0937, + "step": 204400 + }, + { + "epoch": 76.97, + "learning_rate": 4.606699284907791e-06, + "loss": 0.0925, + "step": 204500 + }, + { + "epoch": 77.0, + "eval_loss": 0.09133084863424301, + "eval_runtime": 45.172, + "eval_samples_per_second": 166.032, + "eval_steps_per_second": 10.383, + "step": 204589 + }, + { + "epoch": 77.0, + "learning_rate": 4.599171998494543e-06, + "loss": 0.0911, + "step": 204600 + }, + { + "epoch": 77.04, + "learning_rate": 4.5916447120812955e-06, + "loss": 0.0936, + "step": 204700 + }, + { + "epoch": 77.08, + "learning_rate": 4.584117425668047e-06, + "loss": 0.0913, + "step": 204800 + }, + { + "epoch": 77.12, + "learning_rate": 4.576590139254799e-06, + "loss": 0.0911, + "step": 204900 + }, + { + "epoch": 77.15, + "learning_rate": 4.569062852841551e-06, + "loss": 0.0921, + "step": 205000 + }, + { + "epoch": 77.19, + "learning_rate": 4.561535566428303e-06, + "loss": 0.0918, + "step": 205100 + }, + { + "epoch": 77.23, + "learning_rate": 4.554008280015055e-06, + "loss": 0.0918, + "step": 205200 + }, + { + "epoch": 77.27, + "learning_rate": 4.546480993601806e-06, + "loss": 0.0922, + "step": 205300 + }, + { + "epoch": 77.31, + "learning_rate": 4.538953707188559e-06, + "loss": 0.0918, + "step": 205400 + }, + { + "epoch": 77.34, + "learning_rate": 4.531426420775311e-06, + "loss": 0.0921, + "step": 205500 + }, + { + "epoch": 77.38, + "learning_rate": 4.523899134362063e-06, + "loss": 0.0906, + "step": 205600 + }, + { + "epoch": 77.42, + "learning_rate": 4.516371847948815e-06, + "loss": 0.0905, + "step": 205700 + }, + { + "epoch": 77.46, + "learning_rate": 4.508844561535567e-06, + "loss": 0.0913, + "step": 205800 + }, + { + "epoch": 77.49, + "learning_rate": 4.501317275122319e-06, + "loss": 0.0928, + "step": 205900 + }, + { + "epoch": 77.53, + "learning_rate": 4.4937899887090705e-06, + "loss": 0.0931, + "step": 206000 + }, + { + "epoch": 77.57, + "learning_rate": 4.486262702295823e-06, + "loss": 0.0925, + "step": 206100 + }, + { + "epoch": 77.61, + "learning_rate": 4.478735415882575e-06, + "loss": 0.0907, + "step": 206200 + }, + { + "epoch": 77.64, + "learning_rate": 4.471208129469326e-06, + "loss": 0.0901, + "step": 206300 + }, + { + "epoch": 77.68, + "learning_rate": 4.463680843056079e-06, + "loss": 0.0925, + "step": 206400 + }, + { + "epoch": 77.72, + "learning_rate": 4.4561535566428305e-06, + "loss": 0.0899, + "step": 206500 + }, + { + "epoch": 77.76, + "learning_rate": 4.448626270229582e-06, + "loss": 0.0911, + "step": 206600 + }, + { + "epoch": 77.79, + "learning_rate": 4.441098983816335e-06, + "loss": 0.0921, + "step": 206700 + }, + { + "epoch": 77.83, + "learning_rate": 4.433571697403086e-06, + "loss": 0.092, + "step": 206800 + }, + { + "epoch": 77.87, + "learning_rate": 4.426044410989839e-06, + "loss": 0.0925, + "step": 206900 + }, + { + "epoch": 77.91, + "learning_rate": 4.4185171245765906e-06, + "loss": 0.0904, + "step": 207000 + }, + { + "epoch": 77.95, + "learning_rate": 4.410989838163342e-06, + "loss": 0.0904, + "step": 207100 + }, + { + "epoch": 77.98, + "learning_rate": 4.403462551750095e-06, + "loss": 0.0921, + "step": 207200 + }, + { + "epoch": 78.0, + "eval_loss": 0.09170127660036087, + "eval_runtime": 44.7794, + "eval_samples_per_second": 167.488, + "eval_steps_per_second": 10.474, + "step": 207246 + }, + { + "epoch": 78.02, + "learning_rate": 4.395935265336846e-06, + "loss": 0.0911, + "step": 207300 + }, + { + "epoch": 78.06, + "learning_rate": 4.388407978923598e-06, + "loss": 0.0918, + "step": 207400 + }, + { + "epoch": 78.1, + "learning_rate": 4.380880692510351e-06, + "loss": 0.0918, + "step": 207500 + }, + { + "epoch": 78.13, + "learning_rate": 4.373353406097102e-06, + "loss": 0.0901, + "step": 207600 + }, + { + "epoch": 78.17, + "learning_rate": 4.365826119683854e-06, + "loss": 0.0909, + "step": 207700 + }, + { + "epoch": 78.21, + "learning_rate": 4.3582988332706064e-06, + "loss": 0.0924, + "step": 207800 + }, + { + "epoch": 78.25, + "learning_rate": 4.350771546857358e-06, + "loss": 0.093, + "step": 207900 + }, + { + "epoch": 78.28, + "learning_rate": 4.343244260444111e-06, + "loss": 0.0917, + "step": 208000 + }, + { + "epoch": 78.32, + "learning_rate": 4.335716974030862e-06, + "loss": 0.0918, + "step": 208100 + }, + { + "epoch": 78.36, + "learning_rate": 4.328189687617615e-06, + "loss": 0.0917, + "step": 208200 + }, + { + "epoch": 78.4, + "learning_rate": 4.3206624012043665e-06, + "loss": 0.0918, + "step": 208300 + }, + { + "epoch": 78.43, + "learning_rate": 4.313135114791118e-06, + "loss": 0.0936, + "step": 208400 + }, + { + "epoch": 78.47, + "learning_rate": 4.30560782837787e-06, + "loss": 0.092, + "step": 208500 + }, + { + "epoch": 78.51, + "learning_rate": 4.298080541964622e-06, + "loss": 0.0911, + "step": 208600 + }, + { + "epoch": 78.55, + "learning_rate": 4.290553255551374e-06, + "loss": 0.0909, + "step": 208700 + }, + { + "epoch": 78.58, + "learning_rate": 4.283025969138126e-06, + "loss": 0.0913, + "step": 208800 + }, + { + "epoch": 78.62, + "learning_rate": 4.275498682724878e-06, + "loss": 0.0902, + "step": 208900 + }, + { + "epoch": 78.66, + "learning_rate": 4.26797139631163e-06, + "loss": 0.0917, + "step": 209000 + }, + { + "epoch": 78.7, + "learning_rate": 4.2604441098983815e-06, + "loss": 0.092, + "step": 209100 + }, + { + "epoch": 78.74, + "learning_rate": 4.252916823485134e-06, + "loss": 0.0927, + "step": 209200 + }, + { + "epoch": 78.77, + "learning_rate": 4.2453895370718865e-06, + "loss": 0.093, + "step": 209300 + }, + { + "epoch": 78.81, + "learning_rate": 4.237862250658638e-06, + "loss": 0.0923, + "step": 209400 + }, + { + "epoch": 78.85, + "learning_rate": 4.23033496424539e-06, + "loss": 0.0911, + "step": 209500 + }, + { + "epoch": 78.89, + "learning_rate": 4.222807677832142e-06, + "loss": 0.0914, + "step": 209600 + }, + { + "epoch": 78.92, + "learning_rate": 4.215280391418894e-06, + "loss": 0.092, + "step": 209700 + }, + { + "epoch": 78.96, + "learning_rate": 4.207753105005646e-06, + "loss": 0.0919, + "step": 209800 + }, + { + "epoch": 79.0, + "learning_rate": 4.200225818592397e-06, + "loss": 0.0895, + "step": 209900 + }, + { + "epoch": 79.0, + "eval_loss": 0.09116315096616745, + "eval_runtime": 45.3163, + "eval_samples_per_second": 165.503, + "eval_steps_per_second": 10.349, + "step": 209903 + }, + { + "epoch": 79.04, + "learning_rate": 4.19269853217915e-06, + "loss": 0.0912, + "step": 210000 + }, + { + "epoch": 79.07, + "learning_rate": 4.1851712457659015e-06, + "loss": 0.0909, + "step": 210100 + }, + { + "epoch": 79.11, + "learning_rate": 4.177643959352653e-06, + "loss": 0.0911, + "step": 210200 + }, + { + "epoch": 79.15, + "learning_rate": 4.170116672939406e-06, + "loss": 0.0925, + "step": 210300 + }, + { + "epoch": 79.19, + "learning_rate": 4.162589386526157e-06, + "loss": 0.0926, + "step": 210400 + }, + { + "epoch": 79.22, + "learning_rate": 4.15506210011291e-06, + "loss": 0.0913, + "step": 210500 + }, + { + "epoch": 79.26, + "learning_rate": 4.1475348136996616e-06, + "loss": 0.0907, + "step": 210600 + }, + { + "epoch": 79.3, + "learning_rate": 4.140007527286414e-06, + "loss": 0.0912, + "step": 210700 + }, + { + "epoch": 79.34, + "learning_rate": 4.132480240873166e-06, + "loss": 0.0906, + "step": 210800 + }, + { + "epoch": 79.38, + "learning_rate": 4.124952954459917e-06, + "loss": 0.0914, + "step": 210900 + }, + { + "epoch": 79.41, + "learning_rate": 4.11742566804667e-06, + "loss": 0.0916, + "step": 211000 + }, + { + "epoch": 79.45, + "learning_rate": 4.109898381633422e-06, + "loss": 0.0909, + "step": 211100 + }, + { + "epoch": 79.49, + "learning_rate": 4.102371095220173e-06, + "loss": 0.0917, + "step": 211200 + }, + { + "epoch": 79.53, + "learning_rate": 4.094843808806925e-06, + "loss": 0.0917, + "step": 211300 + }, + { + "epoch": 79.56, + "learning_rate": 4.0873165223936774e-06, + "loss": 0.0915, + "step": 211400 + }, + { + "epoch": 79.6, + "learning_rate": 4.079789235980429e-06, + "loss": 0.091, + "step": 211500 + }, + { + "epoch": 79.64, + "learning_rate": 4.072261949567181e-06, + "loss": 0.0915, + "step": 211600 + }, + { + "epoch": 79.68, + "learning_rate": 4.064734663153933e-06, + "loss": 0.0919, + "step": 211700 + }, + { + "epoch": 79.71, + "learning_rate": 4.057207376740686e-06, + "loss": 0.0909, + "step": 211800 + }, + { + "epoch": 79.75, + "learning_rate": 4.0496800903274375e-06, + "loss": 0.0915, + "step": 211900 + }, + { + "epoch": 79.79, + "learning_rate": 4.042152803914189e-06, + "loss": 0.0904, + "step": 212000 + }, + { + "epoch": 79.83, + "learning_rate": 4.034625517500942e-06, + "loss": 0.09, + "step": 212100 + }, + { + "epoch": 79.86, + "learning_rate": 4.027098231087693e-06, + "loss": 0.0918, + "step": 212200 + }, + { + "epoch": 79.9, + "learning_rate": 4.019570944674445e-06, + "loss": 0.0912, + "step": 212300 + }, + { + "epoch": 79.94, + "learning_rate": 4.0120436582611975e-06, + "loss": 0.0903, + "step": 212400 + }, + { + "epoch": 79.98, + "learning_rate": 4.004516371847949e-06, + "loss": 0.0916, + "step": 212500 + }, + { + "epoch": 80.0, + "eval_loss": 0.09135947376489639, + "eval_runtime": 45.1657, + "eval_samples_per_second": 166.055, + "eval_steps_per_second": 10.384, + "step": 212560 + }, + { + "epoch": 80.02, + "learning_rate": 3.996989085434701e-06, + "loss": 0.0909, + "step": 212600 + }, + { + "epoch": 80.05, + "learning_rate": 3.989461799021453e-06, + "loss": 0.0912, + "step": 212700 + }, + { + "epoch": 80.09, + "learning_rate": 3.981934512608205e-06, + "loss": 0.091, + "step": 212800 + }, + { + "epoch": 80.13, + "learning_rate": 3.974407226194957e-06, + "loss": 0.0916, + "step": 212900 + }, + { + "epoch": 80.17, + "learning_rate": 3.966879939781709e-06, + "loss": 0.0918, + "step": 213000 + }, + { + "epoch": 80.2, + "learning_rate": 3.959352653368461e-06, + "loss": 0.0927, + "step": 213100 + }, + { + "epoch": 80.24, + "learning_rate": 3.951825366955213e-06, + "loss": 0.0928, + "step": 213200 + }, + { + "epoch": 80.28, + "learning_rate": 3.944298080541965e-06, + "loss": 0.0902, + "step": 213300 + }, + { + "epoch": 80.32, + "learning_rate": 3.936770794128717e-06, + "loss": 0.0909, + "step": 213400 + }, + { + "epoch": 80.35, + "learning_rate": 3.929243507715469e-06, + "loss": 0.0909, + "step": 213500 + }, + { + "epoch": 80.39, + "learning_rate": 3.921716221302221e-06, + "loss": 0.089, + "step": 213600 + }, + { + "epoch": 80.43, + "learning_rate": 3.9141889348889725e-06, + "loss": 0.0928, + "step": 213700 + }, + { + "epoch": 80.47, + "learning_rate": 3.906661648475725e-06, + "loss": 0.0895, + "step": 213800 + }, + { + "epoch": 80.5, + "learning_rate": 3.899134362062477e-06, + "loss": 0.091, + "step": 213900 + }, + { + "epoch": 80.54, + "learning_rate": 3.891607075649228e-06, + "loss": 0.0908, + "step": 214000 + }, + { + "epoch": 80.58, + "learning_rate": 3.884079789235981e-06, + "loss": 0.0921, + "step": 214100 + }, + { + "epoch": 80.62, + "learning_rate": 3.8765525028227326e-06, + "loss": 0.0904, + "step": 214200 + }, + { + "epoch": 80.65, + "learning_rate": 3.869025216409485e-06, + "loss": 0.0914, + "step": 214300 + }, + { + "epoch": 80.69, + "learning_rate": 3.861497929996237e-06, + "loss": 0.0898, + "step": 214400 + }, + { + "epoch": 80.73, + "learning_rate": 3.853970643582988e-06, + "loss": 0.0923, + "step": 214500 + }, + { + "epoch": 80.77, + "learning_rate": 3.846443357169741e-06, + "loss": 0.0934, + "step": 214600 + }, + { + "epoch": 80.81, + "learning_rate": 3.838916070756493e-06, + "loss": 0.0914, + "step": 214700 + }, + { + "epoch": 80.84, + "learning_rate": 3.831388784343244e-06, + "loss": 0.09, + "step": 214800 + }, + { + "epoch": 80.88, + "learning_rate": 3.823861497929997e-06, + "loss": 0.091, + "step": 214900 + }, + { + "epoch": 80.92, + "learning_rate": 3.8163342115167484e-06, + "loss": 0.0907, + "step": 215000 + }, + { + "epoch": 80.96, + "learning_rate": 3.8088069251035005e-06, + "loss": 0.0913, + "step": 215100 + }, + { + "epoch": 80.99, + "learning_rate": 3.801279638690252e-06, + "loss": 0.09, + "step": 215200 + }, + { + "epoch": 81.0, + "eval_loss": 0.09087579697370529, + "eval_runtime": 45.0879, + "eval_samples_per_second": 166.342, + "eval_steps_per_second": 10.402, + "step": 215217 + }, + { + "epoch": 81.03, + "learning_rate": 3.7937523522770043e-06, + "loss": 0.0912, + "step": 215300 + }, + { + "epoch": 81.07, + "learning_rate": 3.786225065863756e-06, + "loss": 0.0922, + "step": 215400 + }, + { + "epoch": 81.11, + "learning_rate": 3.7786977794505085e-06, + "loss": 0.0913, + "step": 215500 + }, + { + "epoch": 81.14, + "learning_rate": 3.7711704930372606e-06, + "loss": 0.0917, + "step": 215600 + }, + { + "epoch": 81.18, + "learning_rate": 3.7636432066240126e-06, + "loss": 0.0921, + "step": 215700 + }, + { + "epoch": 81.22, + "learning_rate": 3.7561159202107643e-06, + "loss": 0.0913, + "step": 215800 + }, + { + "epoch": 81.26, + "learning_rate": 3.7485886337975164e-06, + "loss": 0.0918, + "step": 215900 + }, + { + "epoch": 81.29, + "learning_rate": 3.7410613473842685e-06, + "loss": 0.0917, + "step": 216000 + }, + { + "epoch": 81.33, + "learning_rate": 3.73353406097102e-06, + "loss": 0.0888, + "step": 216100 + }, + { + "epoch": 81.37, + "learning_rate": 3.7260067745577722e-06, + "loss": 0.0896, + "step": 216200 + }, + { + "epoch": 81.41, + "learning_rate": 3.718479488144524e-06, + "loss": 0.0917, + "step": 216300 + }, + { + "epoch": 81.45, + "learning_rate": 3.710952201731276e-06, + "loss": 0.0918, + "step": 216400 + }, + { + "epoch": 81.48, + "learning_rate": 3.703424915318028e-06, + "loss": 0.0908, + "step": 216500 + }, + { + "epoch": 81.52, + "learning_rate": 3.6958976289047798e-06, + "loss": 0.0921, + "step": 216600 + }, + { + "epoch": 81.56, + "learning_rate": 3.6883703424915323e-06, + "loss": 0.0896, + "step": 216700 + }, + { + "epoch": 81.6, + "learning_rate": 3.6808430560782844e-06, + "loss": 0.0909, + "step": 216800 + }, + { + "epoch": 81.63, + "learning_rate": 3.673315769665036e-06, + "loss": 0.0912, + "step": 216900 + }, + { + "epoch": 81.67, + "learning_rate": 3.665788483251788e-06, + "loss": 0.0915, + "step": 217000 + }, + { + "epoch": 81.71, + "learning_rate": 3.65826119683854e-06, + "loss": 0.091, + "step": 217100 + }, + { + "epoch": 81.75, + "learning_rate": 3.650733910425292e-06, + "loss": 0.09, + "step": 217200 + }, + { + "epoch": 81.78, + "learning_rate": 3.643206624012044e-06, + "loss": 0.0912, + "step": 217300 + }, + { + "epoch": 81.82, + "learning_rate": 3.635679337598796e-06, + "loss": 0.0908, + "step": 217400 + }, + { + "epoch": 81.86, + "learning_rate": 3.6281520511855477e-06, + "loss": 0.0902, + "step": 217500 + }, + { + "epoch": 81.9, + "learning_rate": 3.6206247647723e-06, + "loss": 0.0907, + "step": 217600 + }, + { + "epoch": 81.93, + "learning_rate": 3.6130974783590515e-06, + "loss": 0.0892, + "step": 217700 + }, + { + "epoch": 81.97, + "learning_rate": 3.6055701919458036e-06, + "loss": 0.0916, + "step": 217800 + }, + { + "epoch": 82.0, + "eval_loss": 0.09082730859518051, + "eval_runtime": 45.1546, + "eval_samples_per_second": 166.096, + "eval_steps_per_second": 10.387, + "step": 217874 + }, + { + "epoch": 82.01, + "learning_rate": 3.598042905532556e-06, + "loss": 0.0915, + "step": 217900 + }, + { + "epoch": 82.05, + "learning_rate": 3.590515619119308e-06, + "loss": 0.091, + "step": 218000 + }, + { + "epoch": 82.09, + "learning_rate": 3.58298833270606e-06, + "loss": 0.0918, + "step": 218100 + }, + { + "epoch": 82.12, + "learning_rate": 3.575461046292812e-06, + "loss": 0.0911, + "step": 218200 + }, + { + "epoch": 82.16, + "learning_rate": 3.5679337598795636e-06, + "loss": 0.0907, + "step": 218300 + }, + { + "epoch": 82.2, + "learning_rate": 3.5604064734663157e-06, + "loss": 0.0912, + "step": 218400 + }, + { + "epoch": 82.24, + "learning_rate": 3.5528791870530678e-06, + "loss": 0.0915, + "step": 218500 + }, + { + "epoch": 82.27, + "learning_rate": 3.5453519006398194e-06, + "loss": 0.0909, + "step": 218600 + }, + { + "epoch": 82.31, + "learning_rate": 3.5378246142265715e-06, + "loss": 0.0915, + "step": 218700 + }, + { + "epoch": 82.35, + "learning_rate": 3.5302973278133236e-06, + "loss": 0.0895, + "step": 218800 + }, + { + "epoch": 82.39, + "learning_rate": 3.5227700414000753e-06, + "loss": 0.0909, + "step": 218900 + }, + { + "epoch": 82.42, + "learning_rate": 3.5152427549868274e-06, + "loss": 0.0922, + "step": 219000 + }, + { + "epoch": 82.46, + "learning_rate": 3.507715468573579e-06, + "loss": 0.0923, + "step": 219100 + }, + { + "epoch": 82.5, + "learning_rate": 3.5001881821603316e-06, + "loss": 0.0923, + "step": 219200 + }, + { + "epoch": 82.54, + "learning_rate": 3.4926608957470836e-06, + "loss": 0.0904, + "step": 219300 + }, + { + "epoch": 82.57, + "learning_rate": 3.4851336093338357e-06, + "loss": 0.0904, + "step": 219400 + }, + { + "epoch": 82.61, + "learning_rate": 3.4776063229205874e-06, + "loss": 0.0921, + "step": 219500 + }, + { + "epoch": 82.65, + "learning_rate": 3.4700790365073395e-06, + "loss": 0.0904, + "step": 219600 + }, + { + "epoch": 82.69, + "learning_rate": 3.4625517500940916e-06, + "loss": 0.0903, + "step": 219700 + }, + { + "epoch": 82.72, + "learning_rate": 3.4550244636808433e-06, + "loss": 0.0911, + "step": 219800 + }, + { + "epoch": 82.76, + "learning_rate": 3.4474971772675953e-06, + "loss": 0.0904, + "step": 219900 + }, + { + "epoch": 82.8, + "learning_rate": 3.439969890854347e-06, + "loss": 0.0898, + "step": 220000 + }, + { + "epoch": 82.84, + "learning_rate": 3.432442604441099e-06, + "loss": 0.0906, + "step": 220100 + }, + { + "epoch": 82.88, + "learning_rate": 3.424915318027851e-06, + "loss": 0.0885, + "step": 220200 + }, + { + "epoch": 82.91, + "learning_rate": 3.417388031614603e-06, + "loss": 0.0917, + "step": 220300 + }, + { + "epoch": 82.95, + "learning_rate": 3.4098607452013554e-06, + "loss": 0.0888, + "step": 220400 + }, + { + "epoch": 82.99, + "learning_rate": 3.4023334587881075e-06, + "loss": 0.0902, + "step": 220500 + }, + { + "epoch": 83.0, + "eval_loss": 0.09073475003242493, + "eval_runtime": 44.912, + "eval_samples_per_second": 166.993, + "eval_steps_per_second": 10.443, + "step": 220531 + }, + { + "epoch": 83.03, + "learning_rate": 3.394806172374859e-06, + "loss": 0.0911, + "step": 220600 + }, + { + "epoch": 83.06, + "learning_rate": 3.3872788859616112e-06, + "loss": 0.0922, + "step": 220700 + }, + { + "epoch": 83.1, + "learning_rate": 3.3797515995483633e-06, + "loss": 0.0911, + "step": 220800 + }, + { + "epoch": 83.14, + "learning_rate": 3.372224313135115e-06, + "loss": 0.0914, + "step": 220900 + }, + { + "epoch": 83.18, + "learning_rate": 3.364697026721867e-06, + "loss": 0.0912, + "step": 221000 + }, + { + "epoch": 83.21, + "learning_rate": 3.357169740308619e-06, + "loss": 0.091, + "step": 221100 + }, + { + "epoch": 83.25, + "learning_rate": 3.349642453895371e-06, + "loss": 0.0911, + "step": 221200 + }, + { + "epoch": 83.29, + "learning_rate": 3.342115167482123e-06, + "loss": 0.0896, + "step": 221300 + }, + { + "epoch": 83.33, + "learning_rate": 3.3345878810688746e-06, + "loss": 0.0926, + "step": 221400 + }, + { + "epoch": 83.36, + "learning_rate": 3.3270605946556267e-06, + "loss": 0.0896, + "step": 221500 + }, + { + "epoch": 83.4, + "learning_rate": 3.3195333082423788e-06, + "loss": 0.0918, + "step": 221600 + }, + { + "epoch": 83.44, + "learning_rate": 3.3120060218291313e-06, + "loss": 0.0904, + "step": 221700 + }, + { + "epoch": 83.48, + "learning_rate": 3.304478735415883e-06, + "loss": 0.0924, + "step": 221800 + }, + { + "epoch": 83.52, + "learning_rate": 3.296951449002635e-06, + "loss": 0.0898, + "step": 221900 + }, + { + "epoch": 83.55, + "learning_rate": 3.289424162589387e-06, + "loss": 0.0919, + "step": 222000 + }, + { + "epoch": 83.59, + "learning_rate": 3.2818968761761388e-06, + "loss": 0.0902, + "step": 222100 + }, + { + "epoch": 83.63, + "learning_rate": 3.274369589762891e-06, + "loss": 0.0911, + "step": 222200 + }, + { + "epoch": 83.67, + "learning_rate": 3.2668423033496425e-06, + "loss": 0.0907, + "step": 222300 + }, + { + "epoch": 83.7, + "learning_rate": 3.2593150169363946e-06, + "loss": 0.0907, + "step": 222400 + }, + { + "epoch": 83.74, + "learning_rate": 3.2517877305231467e-06, + "loss": 0.0894, + "step": 222500 + }, + { + "epoch": 83.78, + "learning_rate": 3.2442604441098984e-06, + "loss": 0.0912, + "step": 222600 + }, + { + "epoch": 83.82, + "learning_rate": 3.2367331576966505e-06, + "loss": 0.0919, + "step": 222700 + }, + { + "epoch": 83.85, + "learning_rate": 3.2292058712834026e-06, + "loss": 0.0906, + "step": 222800 + }, + { + "epoch": 83.89, + "learning_rate": 3.2216785848701546e-06, + "loss": 0.0902, + "step": 222900 + }, + { + "epoch": 83.93, + "learning_rate": 3.2141512984569067e-06, + "loss": 0.0908, + "step": 223000 + }, + { + "epoch": 83.97, + "learning_rate": 3.206624012043659e-06, + "loss": 0.0911, + "step": 223100 + }, + { + "epoch": 84.0, + "eval_loss": 0.09099774062633514, + "eval_runtime": 45.2441, + "eval_samples_per_second": 165.768, + "eval_steps_per_second": 10.366, + "step": 223188 + }, + { + "epoch": 84.0, + "learning_rate": 3.1990967256304105e-06, + "loss": 0.0915, + "step": 223200 + }, + { + "epoch": 84.04, + "learning_rate": 3.1915694392171626e-06, + "loss": 0.0904, + "step": 223300 + }, + { + "epoch": 84.08, + "learning_rate": 3.1840421528039147e-06, + "loss": 0.0894, + "step": 223400 + }, + { + "epoch": 84.12, + "learning_rate": 3.1765148663906663e-06, + "loss": 0.0901, + "step": 223500 + }, + { + "epoch": 84.16, + "learning_rate": 3.1689875799774184e-06, + "loss": 0.0901, + "step": 223600 + }, + { + "epoch": 84.19, + "learning_rate": 3.16146029356417e-06, + "loss": 0.091, + "step": 223700 + }, + { + "epoch": 84.23, + "learning_rate": 3.153933007150922e-06, + "loss": 0.0913, + "step": 223800 + }, + { + "epoch": 84.27, + "learning_rate": 3.1464057207376743e-06, + "loss": 0.091, + "step": 223900 + }, + { + "epoch": 84.31, + "learning_rate": 3.138878434324426e-06, + "loss": 0.0907, + "step": 224000 + }, + { + "epoch": 84.34, + "learning_rate": 3.131351147911178e-06, + "loss": 0.0913, + "step": 224100 + }, + { + "epoch": 84.38, + "learning_rate": 3.1238238614979305e-06, + "loss": 0.0898, + "step": 224200 + }, + { + "epoch": 84.42, + "learning_rate": 3.1162965750846822e-06, + "loss": 0.0897, + "step": 224300 + }, + { + "epoch": 84.46, + "learning_rate": 3.1087692886714343e-06, + "loss": 0.0897, + "step": 224400 + }, + { + "epoch": 84.49, + "learning_rate": 3.1012420022581864e-06, + "loss": 0.0915, + "step": 224500 + }, + { + "epoch": 84.53, + "learning_rate": 3.093714715844938e-06, + "loss": 0.0916, + "step": 224600 + }, + { + "epoch": 84.57, + "learning_rate": 3.08618742943169e-06, + "loss": 0.0932, + "step": 224700 + }, + { + "epoch": 84.61, + "learning_rate": 3.0786601430184422e-06, + "loss": 0.0909, + "step": 224800 + }, + { + "epoch": 84.64, + "learning_rate": 3.071132856605194e-06, + "loss": 0.0897, + "step": 224900 + }, + { + "epoch": 84.68, + "learning_rate": 3.063605570191946e-06, + "loss": 0.0914, + "step": 225000 + }, + { + "epoch": 84.72, + "learning_rate": 3.0560782837786977e-06, + "loss": 0.0891, + "step": 225100 + }, + { + "epoch": 84.76, + "learning_rate": 3.0485509973654498e-06, + "loss": 0.0896, + "step": 225200 + }, + { + "epoch": 84.79, + "learning_rate": 3.041023710952202e-06, + "loss": 0.0911, + "step": 225300 + }, + { + "epoch": 84.83, + "learning_rate": 3.0334964245389544e-06, + "loss": 0.0931, + "step": 225400 + }, + { + "epoch": 84.87, + "learning_rate": 3.025969138125706e-06, + "loss": 0.0922, + "step": 225500 + }, + { + "epoch": 84.91, + "learning_rate": 3.018441851712458e-06, + "loss": 0.0914, + "step": 225600 + }, + { + "epoch": 84.95, + "learning_rate": 3.01091456529921e-06, + "loss": 0.0914, + "step": 225700 + }, + { + "epoch": 84.98, + "learning_rate": 3.003387278885962e-06, + "loss": 0.091, + "step": 225800 + }, + { + "epoch": 85.0, + "eval_loss": 0.0903320163488388, + "eval_runtime": 45.2024, + "eval_samples_per_second": 165.921, + "eval_steps_per_second": 10.376, + "step": 225845 + }, + { + "epoch": 85.02, + "learning_rate": 2.995859992472714e-06, + "loss": 0.0899, + "step": 225900 + }, + { + "epoch": 85.06, + "learning_rate": 2.9883327060594656e-06, + "loss": 0.0913, + "step": 226000 + }, + { + "epoch": 85.1, + "learning_rate": 2.9808054196462177e-06, + "loss": 0.091, + "step": 226100 + }, + { + "epoch": 85.13, + "learning_rate": 2.97327813323297e-06, + "loss": 0.0931, + "step": 226200 + }, + { + "epoch": 85.17, + "learning_rate": 2.9657508468197215e-06, + "loss": 0.0925, + "step": 226300 + }, + { + "epoch": 85.21, + "learning_rate": 2.9582235604064736e-06, + "loss": 0.0903, + "step": 226400 + }, + { + "epoch": 85.25, + "learning_rate": 2.9506962739932257e-06, + "loss": 0.0908, + "step": 226500 + }, + { + "epoch": 85.28, + "learning_rate": 2.9431689875799777e-06, + "loss": 0.091, + "step": 226600 + }, + { + "epoch": 85.32, + "learning_rate": 2.93564170116673e-06, + "loss": 0.0901, + "step": 226700 + }, + { + "epoch": 85.36, + "learning_rate": 2.928114414753482e-06, + "loss": 0.0907, + "step": 226800 + }, + { + "epoch": 85.4, + "learning_rate": 2.9205871283402336e-06, + "loss": 0.0912, + "step": 226900 + }, + { + "epoch": 85.43, + "learning_rate": 2.9130598419269857e-06, + "loss": 0.0904, + "step": 227000 + }, + { + "epoch": 85.47, + "learning_rate": 2.9055325555137378e-06, + "loss": 0.0913, + "step": 227100 + }, + { + "epoch": 85.51, + "learning_rate": 2.8980052691004894e-06, + "loss": 0.0917, + "step": 227200 + }, + { + "epoch": 85.55, + "learning_rate": 2.8904779826872415e-06, + "loss": 0.0897, + "step": 227300 + }, + { + "epoch": 85.59, + "learning_rate": 2.882950696273993e-06, + "loss": 0.09, + "step": 227400 + }, + { + "epoch": 85.62, + "learning_rate": 2.8754234098607453e-06, + "loss": 0.0913, + "step": 227500 + }, + { + "epoch": 85.66, + "learning_rate": 2.8678961234474974e-06, + "loss": 0.0899, + "step": 227600 + }, + { + "epoch": 85.7, + "learning_rate": 2.860368837034249e-06, + "loss": 0.0905, + "step": 227700 + }, + { + "epoch": 85.74, + "learning_rate": 2.852841550621001e-06, + "loss": 0.0898, + "step": 227800 + }, + { + "epoch": 85.77, + "learning_rate": 2.8453142642077536e-06, + "loss": 0.09, + "step": 227900 + }, + { + "epoch": 85.81, + "learning_rate": 2.8377869777945053e-06, + "loss": 0.0908, + "step": 228000 + }, + { + "epoch": 85.85, + "learning_rate": 2.8302596913812574e-06, + "loss": 0.0906, + "step": 228100 + }, + { + "epoch": 85.89, + "learning_rate": 2.8227324049680095e-06, + "loss": 0.0904, + "step": 228200 + }, + { + "epoch": 85.92, + "learning_rate": 2.815205118554761e-06, + "loss": 0.0897, + "step": 228300 + }, + { + "epoch": 85.96, + "learning_rate": 2.8076778321415132e-06, + "loss": 0.091, + "step": 228400 + }, + { + "epoch": 86.0, + "learning_rate": 2.8001505457282653e-06, + "loss": 0.0903, + "step": 228500 + }, + { + "epoch": 86.0, + "eval_loss": 0.0905364602804184, + "eval_runtime": 45.2173, + "eval_samples_per_second": 165.866, + "eval_steps_per_second": 10.372, + "step": 228502 + }, + { + "epoch": 86.04, + "learning_rate": 2.792623259315017e-06, + "loss": 0.0907, + "step": 228600 + }, + { + "epoch": 86.07, + "learning_rate": 2.785095972901769e-06, + "loss": 0.0918, + "step": 228700 + }, + { + "epoch": 86.11, + "learning_rate": 2.7775686864885208e-06, + "loss": 0.0925, + "step": 228800 + }, + { + "epoch": 86.15, + "learning_rate": 2.770041400075273e-06, + "loss": 0.0897, + "step": 228900 + }, + { + "epoch": 86.19, + "learning_rate": 2.762514113662025e-06, + "loss": 0.0895, + "step": 229000 + }, + { + "epoch": 86.23, + "learning_rate": 2.7549868272487774e-06, + "loss": 0.088, + "step": 229100 + }, + { + "epoch": 86.26, + "learning_rate": 2.747459540835529e-06, + "loss": 0.0899, + "step": 229200 + }, + { + "epoch": 86.3, + "learning_rate": 2.739932254422281e-06, + "loss": 0.0898, + "step": 229300 + }, + { + "epoch": 86.34, + "learning_rate": 2.7324049680090333e-06, + "loss": 0.0904, + "step": 229400 + }, + { + "epoch": 86.38, + "learning_rate": 2.724877681595785e-06, + "loss": 0.0904, + "step": 229500 + }, + { + "epoch": 86.41, + "learning_rate": 2.717350395182537e-06, + "loss": 0.0906, + "step": 229600 + }, + { + "epoch": 86.45, + "learning_rate": 2.7098231087692887e-06, + "loss": 0.0899, + "step": 229700 + }, + { + "epoch": 86.49, + "learning_rate": 2.702295822356041e-06, + "loss": 0.0889, + "step": 229800 + }, + { + "epoch": 86.53, + "learning_rate": 2.694768535942793e-06, + "loss": 0.0915, + "step": 229900 + }, + { + "epoch": 86.56, + "learning_rate": 2.6872412495295446e-06, + "loss": 0.09, + "step": 230000 + }, + { + "epoch": 86.6, + "learning_rate": 2.6797139631162967e-06, + "loss": 0.0911, + "step": 230100 + }, + { + "epoch": 86.64, + "learning_rate": 2.6721866767030487e-06, + "loss": 0.0911, + "step": 230200 + }, + { + "epoch": 86.68, + "learning_rate": 2.6646593902898004e-06, + "loss": 0.0914, + "step": 230300 + }, + { + "epoch": 86.71, + "learning_rate": 2.657132103876553e-06, + "loss": 0.0904, + "step": 230400 + }, + { + "epoch": 86.75, + "learning_rate": 2.649604817463305e-06, + "loss": 0.0892, + "step": 230500 + }, + { + "epoch": 86.79, + "learning_rate": 2.6420775310500567e-06, + "loss": 0.0896, + "step": 230600 + }, + { + "epoch": 86.83, + "learning_rate": 2.6345502446368088e-06, + "loss": 0.0902, + "step": 230700 + }, + { + "epoch": 86.86, + "learning_rate": 2.627022958223561e-06, + "loss": 0.0907, + "step": 230800 + }, + { + "epoch": 86.9, + "learning_rate": 2.6194956718103125e-06, + "loss": 0.0904, + "step": 230900 + }, + { + "epoch": 86.94, + "learning_rate": 2.6119683853970646e-06, + "loss": 0.0906, + "step": 231000 + }, + { + "epoch": 86.98, + "learning_rate": 2.6044410989838163e-06, + "loss": 0.0907, + "step": 231100 + }, + { + "epoch": 87.0, + "eval_loss": 0.09008638560771942, + "eval_runtime": 45.2794, + "eval_samples_per_second": 165.638, + "eval_steps_per_second": 10.358, + "step": 231159 + }, + { + "epoch": 87.02, + "learning_rate": 2.5969138125705684e-06, + "loss": 0.0909, + "step": 231200 + }, + { + "epoch": 87.05, + "learning_rate": 2.5893865261573205e-06, + "loss": 0.0889, + "step": 231300 + }, + { + "epoch": 87.09, + "learning_rate": 2.581859239744072e-06, + "loss": 0.0905, + "step": 231400 + }, + { + "epoch": 87.13, + "learning_rate": 2.5743319533308242e-06, + "loss": 0.0887, + "step": 231500 + }, + { + "epoch": 87.17, + "learning_rate": 2.5668046669175767e-06, + "loss": 0.0914, + "step": 231600 + }, + { + "epoch": 87.2, + "learning_rate": 2.559277380504329e-06, + "loss": 0.0906, + "step": 231700 + }, + { + "epoch": 87.24, + "learning_rate": 2.5517500940910805e-06, + "loss": 0.0909, + "step": 231800 + }, + { + "epoch": 87.28, + "learning_rate": 2.5442228076778326e-06, + "loss": 0.09, + "step": 231900 + }, + { + "epoch": 87.32, + "learning_rate": 2.5366955212645842e-06, + "loss": 0.0888, + "step": 232000 + }, + { + "epoch": 87.35, + "learning_rate": 2.5291682348513363e-06, + "loss": 0.09, + "step": 232100 + }, + { + "epoch": 87.39, + "learning_rate": 2.5216409484380884e-06, + "loss": 0.0895, + "step": 232200 + }, + { + "epoch": 87.43, + "learning_rate": 2.51411366202484e-06, + "loss": 0.0907, + "step": 232300 + }, + { + "epoch": 87.47, + "learning_rate": 2.506586375611592e-06, + "loss": 0.0906, + "step": 232400 + }, + { + "epoch": 87.5, + "learning_rate": 2.4990590891983443e-06, + "loss": 0.0907, + "step": 232500 + }, + { + "epoch": 87.54, + "learning_rate": 2.4915318027850964e-06, + "loss": 0.0907, + "step": 232600 + }, + { + "epoch": 87.58, + "learning_rate": 2.484004516371848e-06, + "loss": 0.0907, + "step": 232700 + }, + { + "epoch": 87.62, + "learning_rate": 2.4764772299586e-06, + "loss": 0.0892, + "step": 232800 + }, + { + "epoch": 87.66, + "learning_rate": 2.468949943545352e-06, + "loss": 0.0908, + "step": 232900 + }, + { + "epoch": 87.69, + "learning_rate": 2.461422657132104e-06, + "loss": 0.0906, + "step": 233000 + }, + { + "epoch": 87.73, + "learning_rate": 2.453895370718856e-06, + "loss": 0.0907, + "step": 233100 + }, + { + "epoch": 87.77, + "learning_rate": 2.446368084305608e-06, + "loss": 0.0905, + "step": 233200 + }, + { + "epoch": 87.81, + "learning_rate": 2.43884079789236e-06, + "loss": 0.0913, + "step": 233300 + }, + { + "epoch": 87.84, + "learning_rate": 2.431313511479112e-06, + "loss": 0.0926, + "step": 233400 + }, + { + "epoch": 87.88, + "learning_rate": 2.423786225065864e-06, + "loss": 0.0924, + "step": 233500 + }, + { + "epoch": 87.92, + "learning_rate": 2.416258938652616e-06, + "loss": 0.0897, + "step": 233600 + }, + { + "epoch": 87.96, + "learning_rate": 2.4087316522393677e-06, + "loss": 0.0922, + "step": 233700 + }, + { + "epoch": 87.99, + "learning_rate": 2.40120436582612e-06, + "loss": 0.0908, + "step": 233800 + }, + { + "epoch": 88.0, + "eval_loss": 0.0906805768609047, + "eval_runtime": 44.9216, + "eval_samples_per_second": 166.958, + "eval_steps_per_second": 10.44, + "step": 233816 + }, + { + "epoch": 88.03, + "learning_rate": 2.393677079412872e-06, + "loss": 0.0916, + "step": 233900 + }, + { + "epoch": 88.07, + "learning_rate": 2.386149792999624e-06, + "loss": 0.0901, + "step": 234000 + }, + { + "epoch": 88.11, + "learning_rate": 2.3786225065863756e-06, + "loss": 0.0899, + "step": 234100 + }, + { + "epoch": 88.14, + "learning_rate": 2.3710952201731277e-06, + "loss": 0.0909, + "step": 234200 + }, + { + "epoch": 88.18, + "learning_rate": 2.3635679337598798e-06, + "loss": 0.0904, + "step": 234300 + }, + { + "epoch": 88.22, + "learning_rate": 2.356040647346632e-06, + "loss": 0.0914, + "step": 234400 + }, + { + "epoch": 88.26, + "learning_rate": 2.348513360933384e-06, + "loss": 0.091, + "step": 234500 + }, + { + "epoch": 88.3, + "learning_rate": 2.3409860745201356e-06, + "loss": 0.0919, + "step": 234600 + }, + { + "epoch": 88.33, + "learning_rate": 2.3334587881068877e-06, + "loss": 0.091, + "step": 234700 + }, + { + "epoch": 88.37, + "learning_rate": 2.3259315016936394e-06, + "loss": 0.0899, + "step": 234800 + }, + { + "epoch": 88.41, + "learning_rate": 2.3184042152803915e-06, + "loss": 0.0899, + "step": 234900 + }, + { + "epoch": 88.45, + "learning_rate": 2.3108769288671436e-06, + "loss": 0.0899, + "step": 235000 + }, + { + "epoch": 88.48, + "learning_rate": 2.3033496424538956e-06, + "loss": 0.0916, + "step": 235100 + }, + { + "epoch": 88.52, + "learning_rate": 2.2958223560406477e-06, + "loss": 0.0901, + "step": 235200 + }, + { + "epoch": 88.56, + "learning_rate": 2.2882950696273994e-06, + "loss": 0.0898, + "step": 235300 + }, + { + "epoch": 88.6, + "learning_rate": 2.2807677832141515e-06, + "loss": 0.0908, + "step": 235400 + }, + { + "epoch": 88.63, + "learning_rate": 2.273240496800903e-06, + "loss": 0.0891, + "step": 235500 + }, + { + "epoch": 88.67, + "learning_rate": 2.2657132103876557e-06, + "loss": 0.0909, + "step": 235600 + }, + { + "epoch": 88.71, + "learning_rate": 2.2581859239744073e-06, + "loss": 0.0889, + "step": 235700 + }, + { + "epoch": 88.75, + "learning_rate": 2.2506586375611594e-06, + "loss": 0.093, + "step": 235800 + }, + { + "epoch": 88.78, + "learning_rate": 2.2431313511479115e-06, + "loss": 0.0902, + "step": 235900 + }, + { + "epoch": 88.82, + "learning_rate": 2.235604064734663e-06, + "loss": 0.09, + "step": 236000 + }, + { + "epoch": 88.86, + "learning_rate": 2.2280767783214153e-06, + "loss": 0.0893, + "step": 236100 + }, + { + "epoch": 88.9, + "learning_rate": 2.2205494919081674e-06, + "loss": 0.0902, + "step": 236200 + }, + { + "epoch": 88.93, + "learning_rate": 2.2130222054949194e-06, + "loss": 0.0912, + "step": 236300 + }, + { + "epoch": 88.97, + "learning_rate": 2.205494919081671e-06, + "loss": 0.0911, + "step": 236400 + }, + { + "epoch": 89.0, + "eval_loss": 0.09018085896968842, + "eval_runtime": 45.1243, + "eval_samples_per_second": 166.207, + "eval_steps_per_second": 10.394, + "step": 236473 + }, + { + "epoch": 89.01, + "learning_rate": 2.197967632668423e-06, + "loss": 0.092, + "step": 236500 + }, + { + "epoch": 89.05, + "learning_rate": 2.1904403462551753e-06, + "loss": 0.0904, + "step": 236600 + }, + { + "epoch": 89.09, + "learning_rate": 2.182913059841927e-06, + "loss": 0.09, + "step": 236700 + }, + { + "epoch": 89.12, + "learning_rate": 2.175385773428679e-06, + "loss": 0.0911, + "step": 236800 + }, + { + "epoch": 89.16, + "learning_rate": 2.167858487015431e-06, + "loss": 0.0883, + "step": 236900 + }, + { + "epoch": 89.2, + "learning_rate": 2.1603312006021832e-06, + "loss": 0.0904, + "step": 237000 + }, + { + "epoch": 89.24, + "learning_rate": 2.152803914188935e-06, + "loss": 0.0912, + "step": 237100 + }, + { + "epoch": 89.27, + "learning_rate": 2.145276627775687e-06, + "loss": 0.0892, + "step": 237200 + }, + { + "epoch": 89.31, + "learning_rate": 2.137749341362439e-06, + "loss": 0.0912, + "step": 237300 + }, + { + "epoch": 89.35, + "learning_rate": 2.1302220549491907e-06, + "loss": 0.09, + "step": 237400 + }, + { + "epoch": 89.39, + "learning_rate": 2.1226947685359433e-06, + "loss": 0.0904, + "step": 237500 + }, + { + "epoch": 89.42, + "learning_rate": 2.115167482122695e-06, + "loss": 0.0913, + "step": 237600 + }, + { + "epoch": 89.46, + "learning_rate": 2.107640195709447e-06, + "loss": 0.0904, + "step": 237700 + }, + { + "epoch": 89.5, + "learning_rate": 2.1001129092961987e-06, + "loss": 0.0899, + "step": 237800 + }, + { + "epoch": 89.54, + "learning_rate": 2.0925856228829508e-06, + "loss": 0.0893, + "step": 237900 + }, + { + "epoch": 89.57, + "learning_rate": 2.085058336469703e-06, + "loss": 0.0903, + "step": 238000 + }, + { + "epoch": 89.61, + "learning_rate": 2.077531050056455e-06, + "loss": 0.09, + "step": 238100 + }, + { + "epoch": 89.65, + "learning_rate": 2.070003763643207e-06, + "loss": 0.0903, + "step": 238200 + }, + { + "epoch": 89.69, + "learning_rate": 2.0624764772299587e-06, + "loss": 0.0891, + "step": 238300 + }, + { + "epoch": 89.73, + "learning_rate": 2.054949190816711e-06, + "loss": 0.0892, + "step": 238400 + }, + { + "epoch": 89.76, + "learning_rate": 2.0474219044034625e-06, + "loss": 0.09, + "step": 238500 + }, + { + "epoch": 89.8, + "learning_rate": 2.0398946179902146e-06, + "loss": 0.0908, + "step": 238600 + }, + { + "epoch": 89.84, + "learning_rate": 2.0323673315769666e-06, + "loss": 0.0902, + "step": 238700 + }, + { + "epoch": 89.88, + "learning_rate": 2.0248400451637187e-06, + "loss": 0.0916, + "step": 238800 + }, + { + "epoch": 89.91, + "learning_rate": 2.017312758750471e-06, + "loss": 0.0891, + "step": 238900 + }, + { + "epoch": 89.95, + "learning_rate": 2.0097854723372225e-06, + "loss": 0.0898, + "step": 239000 + }, + { + "epoch": 89.99, + "learning_rate": 2.0022581859239746e-06, + "loss": 0.0905, + "step": 239100 + }, + { + "epoch": 90.0, + "eval_loss": 0.09060540050268173, + "eval_runtime": 45.1371, + "eval_samples_per_second": 166.16, + "eval_steps_per_second": 10.391, + "step": 239130 + }, + { + "epoch": 90.03, + "learning_rate": 1.9947308995107267e-06, + "loss": 0.0915, + "step": 239200 + }, + { + "epoch": 90.06, + "learning_rate": 1.9872036130974783e-06, + "loss": 0.0896, + "step": 239300 + }, + { + "epoch": 90.1, + "learning_rate": 1.9796763266842304e-06, + "loss": 0.0899, + "step": 239400 + }, + { + "epoch": 90.14, + "learning_rate": 1.9721490402709825e-06, + "loss": 0.091, + "step": 239500 + }, + { + "epoch": 90.18, + "learning_rate": 1.9646217538577346e-06, + "loss": 0.0894, + "step": 239600 + }, + { + "epoch": 90.21, + "learning_rate": 1.9570944674444863e-06, + "loss": 0.0897, + "step": 239700 + }, + { + "epoch": 90.25, + "learning_rate": 1.9495671810312384e-06, + "loss": 0.0905, + "step": 239800 + }, + { + "epoch": 90.29, + "learning_rate": 1.9420398946179905e-06, + "loss": 0.0893, + "step": 239900 + }, + { + "epoch": 90.33, + "learning_rate": 1.9345126082047425e-06, + "loss": 0.0904, + "step": 240000 + }, + { + "epoch": 90.37, + "learning_rate": 1.926985321791494e-06, + "loss": 0.0908, + "step": 240100 + }, + { + "epoch": 90.4, + "learning_rate": 1.9194580353782463e-06, + "loss": 0.0892, + "step": 240200 + }, + { + "epoch": 90.44, + "learning_rate": 1.9119307489649984e-06, + "loss": 0.0906, + "step": 240300 + }, + { + "epoch": 90.48, + "learning_rate": 1.9044034625517503e-06, + "loss": 0.0907, + "step": 240400 + }, + { + "epoch": 90.52, + "learning_rate": 1.8968761761385021e-06, + "loss": 0.0917, + "step": 240500 + }, + { + "epoch": 90.55, + "learning_rate": 1.8893488897252542e-06, + "loss": 0.0902, + "step": 240600 + }, + { + "epoch": 90.59, + "learning_rate": 1.8818216033120063e-06, + "loss": 0.0894, + "step": 240700 + }, + { + "epoch": 90.63, + "learning_rate": 1.8742943168987582e-06, + "loss": 0.0887, + "step": 240800 + }, + { + "epoch": 90.67, + "learning_rate": 1.86676703048551e-06, + "loss": 0.0913, + "step": 240900 + }, + { + "epoch": 90.7, + "learning_rate": 1.859239744072262e-06, + "loss": 0.091, + "step": 241000 + }, + { + "epoch": 90.74, + "learning_rate": 1.851712457659014e-06, + "loss": 0.0892, + "step": 241100 + }, + { + "epoch": 90.78, + "learning_rate": 1.8441851712457661e-06, + "loss": 0.0898, + "step": 241200 + }, + { + "epoch": 90.82, + "learning_rate": 1.836657884832518e-06, + "loss": 0.0888, + "step": 241300 + }, + { + "epoch": 90.85, + "learning_rate": 1.82913059841927e-06, + "loss": 0.0895, + "step": 241400 + }, + { + "epoch": 90.89, + "learning_rate": 1.821603312006022e-06, + "loss": 0.0905, + "step": 241500 + }, + { + "epoch": 90.93, + "learning_rate": 1.8140760255927739e-06, + "loss": 0.0906, + "step": 241600 + }, + { + "epoch": 90.97, + "learning_rate": 1.8065487391795257e-06, + "loss": 0.089, + "step": 241700 + }, + { + "epoch": 91.0, + "eval_loss": 0.0901167169213295, + "eval_runtime": 44.9942, + "eval_samples_per_second": 166.688, + "eval_steps_per_second": 10.424, + "step": 241787 + }, + { + "epoch": 91.0, + "learning_rate": 1.799021452766278e-06, + "loss": 0.0896, + "step": 241800 + }, + { + "epoch": 91.04, + "learning_rate": 1.79149416635303e-06, + "loss": 0.0903, + "step": 241900 + }, + { + "epoch": 91.08, + "learning_rate": 1.7839668799397818e-06, + "loss": 0.0903, + "step": 242000 + }, + { + "epoch": 91.12, + "learning_rate": 1.7764395935265339e-06, + "loss": 0.0887, + "step": 242100 + }, + { + "epoch": 91.16, + "learning_rate": 1.7689123071132858e-06, + "loss": 0.0893, + "step": 242200 + }, + { + "epoch": 91.19, + "learning_rate": 1.7613850207000376e-06, + "loss": 0.0905, + "step": 242300 + }, + { + "epoch": 91.23, + "learning_rate": 1.7538577342867895e-06, + "loss": 0.0896, + "step": 242400 + }, + { + "epoch": 91.27, + "learning_rate": 1.7463304478735418e-06, + "loss": 0.0897, + "step": 242500 + }, + { + "epoch": 91.31, + "learning_rate": 1.7388031614602937e-06, + "loss": 0.091, + "step": 242600 + }, + { + "epoch": 91.34, + "learning_rate": 1.7312758750470458e-06, + "loss": 0.0905, + "step": 242700 + }, + { + "epoch": 91.38, + "learning_rate": 1.7237485886337977e-06, + "loss": 0.0887, + "step": 242800 + }, + { + "epoch": 91.42, + "learning_rate": 1.7162213022205495e-06, + "loss": 0.0896, + "step": 242900 + }, + { + "epoch": 91.46, + "learning_rate": 1.7086940158073014e-06, + "loss": 0.0905, + "step": 243000 + }, + { + "epoch": 91.49, + "learning_rate": 1.7011667293940537e-06, + "loss": 0.0874, + "step": 243100 + }, + { + "epoch": 91.53, + "learning_rate": 1.6936394429808056e-06, + "loss": 0.0896, + "step": 243200 + }, + { + "epoch": 91.57, + "learning_rate": 1.6861121565675575e-06, + "loss": 0.0888, + "step": 243300 + }, + { + "epoch": 91.61, + "learning_rate": 1.6785848701543096e-06, + "loss": 0.0891, + "step": 243400 + }, + { + "epoch": 91.64, + "learning_rate": 1.6710575837410615e-06, + "loss": 0.0905, + "step": 243500 + }, + { + "epoch": 91.68, + "learning_rate": 1.6635302973278133e-06, + "loss": 0.0915, + "step": 243600 + }, + { + "epoch": 91.72, + "learning_rate": 1.6560030109145656e-06, + "loss": 0.0912, + "step": 243700 + }, + { + "epoch": 91.76, + "learning_rate": 1.6484757245013175e-06, + "loss": 0.0892, + "step": 243800 + }, + { + "epoch": 91.8, + "learning_rate": 1.6409484380880694e-06, + "loss": 0.089, + "step": 243900 + }, + { + "epoch": 91.83, + "learning_rate": 1.6334211516748213e-06, + "loss": 0.0894, + "step": 244000 + }, + { + "epoch": 91.87, + "learning_rate": 1.6258938652615734e-06, + "loss": 0.0902, + "step": 244100 + }, + { + "epoch": 91.91, + "learning_rate": 1.6183665788483252e-06, + "loss": 0.0898, + "step": 244200 + }, + { + "epoch": 91.95, + "learning_rate": 1.6108392924350773e-06, + "loss": 0.0897, + "step": 244300 + }, + { + "epoch": 91.98, + "learning_rate": 1.6033120060218294e-06, + "loss": 0.0908, + "step": 244400 + }, + { + "epoch": 92.0, + "eval_loss": 0.08964475989341736, + "eval_runtime": 43.9599, + "eval_samples_per_second": 170.61, + "eval_steps_per_second": 10.669, + "step": 244444 + }, + { + "epoch": 92.02, + "learning_rate": 1.5957847196085813e-06, + "loss": 0.0888, + "step": 244500 + }, + { + "epoch": 92.06, + "learning_rate": 1.5882574331953332e-06, + "loss": 0.0873, + "step": 244600 + }, + { + "epoch": 92.1, + "learning_rate": 1.580730146782085e-06, + "loss": 0.091, + "step": 244700 + }, + { + "epoch": 92.13, + "learning_rate": 1.5732028603688371e-06, + "loss": 0.0898, + "step": 244800 + }, + { + "epoch": 92.17, + "learning_rate": 1.565675573955589e-06, + "loss": 0.0895, + "step": 244900 + }, + { + "epoch": 92.21, + "learning_rate": 1.5581482875423411e-06, + "loss": 0.0898, + "step": 245000 + }, + { + "epoch": 92.25, + "learning_rate": 1.5506210011290932e-06, + "loss": 0.0912, + "step": 245100 + }, + { + "epoch": 92.28, + "learning_rate": 1.543093714715845e-06, + "loss": 0.09, + "step": 245200 + }, + { + "epoch": 92.32, + "learning_rate": 1.535566428302597e-06, + "loss": 0.0899, + "step": 245300 + }, + { + "epoch": 92.36, + "learning_rate": 1.5280391418893488e-06, + "loss": 0.0904, + "step": 245400 + }, + { + "epoch": 92.4, + "learning_rate": 1.520511855476101e-06, + "loss": 0.0907, + "step": 245500 + }, + { + "epoch": 92.44, + "learning_rate": 1.512984569062853e-06, + "loss": 0.0901, + "step": 245600 + }, + { + "epoch": 92.47, + "learning_rate": 1.505457282649605e-06, + "loss": 0.0912, + "step": 245700 + }, + { + "epoch": 92.51, + "learning_rate": 1.497929996236357e-06, + "loss": 0.0901, + "step": 245800 + }, + { + "epoch": 92.55, + "learning_rate": 1.4904027098231089e-06, + "loss": 0.091, + "step": 245900 + }, + { + "epoch": 92.59, + "learning_rate": 1.4828754234098607e-06, + "loss": 0.0893, + "step": 246000 + }, + { + "epoch": 92.62, + "learning_rate": 1.4753481369966128e-06, + "loss": 0.0899, + "step": 246100 + }, + { + "epoch": 92.66, + "learning_rate": 1.467820850583365e-06, + "loss": 0.0898, + "step": 246200 + }, + { + "epoch": 92.7, + "learning_rate": 1.4602935641701168e-06, + "loss": 0.0908, + "step": 246300 + }, + { + "epoch": 92.74, + "learning_rate": 1.4527662777568689e-06, + "loss": 0.0909, + "step": 246400 + }, + { + "epoch": 92.77, + "learning_rate": 1.4452389913436208e-06, + "loss": 0.0887, + "step": 246500 + }, + { + "epoch": 92.81, + "learning_rate": 1.4377117049303726e-06, + "loss": 0.089, + "step": 246600 + }, + { + "epoch": 92.85, + "learning_rate": 1.4301844185171245e-06, + "loss": 0.0903, + "step": 246700 + }, + { + "epoch": 92.89, + "learning_rate": 1.4226571321038768e-06, + "loss": 0.0889, + "step": 246800 + }, + { + "epoch": 92.92, + "learning_rate": 1.4151298456906287e-06, + "loss": 0.0894, + "step": 246900 + }, + { + "epoch": 92.96, + "learning_rate": 1.4076025592773806e-06, + "loss": 0.0913, + "step": 247000 + }, + { + "epoch": 93.0, + "learning_rate": 1.4000752728641327e-06, + "loss": 0.0894, + "step": 247100 + }, + { + "epoch": 93.0, + "eval_loss": 0.08920498192310333, + "eval_runtime": 43.79, + "eval_samples_per_second": 171.272, + "eval_steps_per_second": 10.71, + "step": 247101 + }, + { + "epoch": 93.04, + "learning_rate": 1.3925479864508845e-06, + "loss": 0.0895, + "step": 247200 + }, + { + "epoch": 93.07, + "learning_rate": 1.3850207000376364e-06, + "loss": 0.0895, + "step": 247300 + }, + { + "epoch": 93.11, + "learning_rate": 1.3774934136243887e-06, + "loss": 0.0912, + "step": 247400 + }, + { + "epoch": 93.15, + "learning_rate": 1.3699661272111406e-06, + "loss": 0.0905, + "step": 247500 + }, + { + "epoch": 93.19, + "learning_rate": 1.3624388407978925e-06, + "loss": 0.0893, + "step": 247600 + }, + { + "epoch": 93.23, + "learning_rate": 1.3549115543846444e-06, + "loss": 0.0889, + "step": 247700 + }, + { + "epoch": 93.26, + "learning_rate": 1.3473842679713964e-06, + "loss": 0.0902, + "step": 247800 + }, + { + "epoch": 93.3, + "learning_rate": 1.3398569815581483e-06, + "loss": 0.0891, + "step": 247900 + }, + { + "epoch": 93.34, + "learning_rate": 1.3323296951449002e-06, + "loss": 0.0896, + "step": 248000 + }, + { + "epoch": 93.38, + "learning_rate": 1.3248024087316525e-06, + "loss": 0.0899, + "step": 248100 + }, + { + "epoch": 93.41, + "learning_rate": 1.3172751223184044e-06, + "loss": 0.0898, + "step": 248200 + }, + { + "epoch": 93.45, + "learning_rate": 1.3097478359051563e-06, + "loss": 0.0884, + "step": 248300 + }, + { + "epoch": 93.49, + "learning_rate": 1.3022205494919081e-06, + "loss": 0.0902, + "step": 248400 + }, + { + "epoch": 93.53, + "learning_rate": 1.2946932630786602e-06, + "loss": 0.0923, + "step": 248500 + }, + { + "epoch": 93.56, + "learning_rate": 1.2871659766654121e-06, + "loss": 0.0896, + "step": 248600 + }, + { + "epoch": 93.6, + "learning_rate": 1.2796386902521644e-06, + "loss": 0.0904, + "step": 248700 + }, + { + "epoch": 93.64, + "learning_rate": 1.2721114038389163e-06, + "loss": 0.0911, + "step": 248800 + }, + { + "epoch": 93.68, + "learning_rate": 1.2645841174256682e-06, + "loss": 0.0897, + "step": 248900 + }, + { + "epoch": 93.71, + "learning_rate": 1.25705683101242e-06, + "loss": 0.0898, + "step": 249000 + }, + { + "epoch": 93.75, + "learning_rate": 1.2495295445991721e-06, + "loss": 0.0889, + "step": 249100 + }, + { + "epoch": 93.79, + "learning_rate": 1.242002258185924e-06, + "loss": 0.0892, + "step": 249200 + }, + { + "epoch": 93.83, + "learning_rate": 1.234474971772676e-06, + "loss": 0.0881, + "step": 249300 + }, + { + "epoch": 93.87, + "learning_rate": 1.226947685359428e-06, + "loss": 0.0904, + "step": 249400 + }, + { + "epoch": 93.9, + "learning_rate": 1.21942039894618e-06, + "loss": 0.0894, + "step": 249500 + }, + { + "epoch": 93.94, + "learning_rate": 1.211893112532932e-06, + "loss": 0.0904, + "step": 249600 + }, + { + "epoch": 93.98, + "learning_rate": 1.2043658261196838e-06, + "loss": 0.0899, + "step": 249700 + }, + { + "epoch": 94.0, + "eval_loss": 0.08932201564311981, + "eval_runtime": 43.7672, + "eval_samples_per_second": 171.361, + "eval_steps_per_second": 10.716, + "step": 249758 + }, + { + "epoch": 94.02, + "learning_rate": 1.196838539706436e-06, + "loss": 0.0896, + "step": 249800 + }, + { + "epoch": 94.05, + "learning_rate": 1.1893112532931878e-06, + "loss": 0.0895, + "step": 249900 + }, + { + "epoch": 94.09, + "learning_rate": 1.1817839668799399e-06, + "loss": 0.0899, + "step": 250000 + }, + { + "epoch": 94.13, + "learning_rate": 1.174256680466692e-06, + "loss": 0.0902, + "step": 250100 + }, + { + "epoch": 94.17, + "learning_rate": 1.1667293940534439e-06, + "loss": 0.0901, + "step": 250200 + }, + { + "epoch": 94.2, + "learning_rate": 1.1592021076401957e-06, + "loss": 0.0885, + "step": 250300 + }, + { + "epoch": 94.24, + "learning_rate": 1.1516748212269478e-06, + "loss": 0.0906, + "step": 250400 + }, + { + "epoch": 94.28, + "learning_rate": 1.1441475348136997e-06, + "loss": 0.0899, + "step": 250500 + }, + { + "epoch": 94.32, + "learning_rate": 1.1366202484004516e-06, + "loss": 0.0894, + "step": 250600 + }, + { + "epoch": 94.35, + "learning_rate": 1.1290929619872037e-06, + "loss": 0.0892, + "step": 250700 + }, + { + "epoch": 94.39, + "learning_rate": 1.1215656755739558e-06, + "loss": 0.0912, + "step": 250800 + }, + { + "epoch": 94.43, + "learning_rate": 1.1140383891607076e-06, + "loss": 0.0871, + "step": 250900 + }, + { + "epoch": 94.47, + "learning_rate": 1.1065111027474597e-06, + "loss": 0.0898, + "step": 251000 + }, + { + "epoch": 94.51, + "learning_rate": 1.0989838163342116e-06, + "loss": 0.0896, + "step": 251100 + }, + { + "epoch": 94.54, + "learning_rate": 1.0914565299209635e-06, + "loss": 0.0914, + "step": 251200 + }, + { + "epoch": 94.58, + "learning_rate": 1.0839292435077156e-06, + "loss": 0.0907, + "step": 251300 + }, + { + "epoch": 94.62, + "learning_rate": 1.0764019570944674e-06, + "loss": 0.0896, + "step": 251400 + }, + { + "epoch": 94.66, + "learning_rate": 1.0688746706812195e-06, + "loss": 0.0888, + "step": 251500 + }, + { + "epoch": 94.69, + "learning_rate": 1.0613473842679716e-06, + "loss": 0.0879, + "step": 251600 + }, + { + "epoch": 94.73, + "learning_rate": 1.0538200978547235e-06, + "loss": 0.0899, + "step": 251700 + }, + { + "epoch": 94.77, + "learning_rate": 1.0462928114414754e-06, + "loss": 0.0903, + "step": 251800 + }, + { + "epoch": 94.81, + "learning_rate": 1.0387655250282275e-06, + "loss": 0.0878, + "step": 251900 + }, + { + "epoch": 94.84, + "learning_rate": 1.0312382386149794e-06, + "loss": 0.0894, + "step": 252000 + }, + { + "epoch": 94.88, + "learning_rate": 1.0237109522017312e-06, + "loss": 0.0901, + "step": 252100 + }, + { + "epoch": 94.92, + "learning_rate": 1.0161836657884833e-06, + "loss": 0.0906, + "step": 252200 + }, + { + "epoch": 94.96, + "learning_rate": 1.0086563793752354e-06, + "loss": 0.0911, + "step": 252300 + }, + { + "epoch": 94.99, + "learning_rate": 1.0011290929619873e-06, + "loss": 0.0899, + "step": 252400 + }, + { + "epoch": 95.0, + "eval_loss": 0.08966313302516937, + "eval_runtime": 43.6014, + "eval_samples_per_second": 172.013, + "eval_steps_per_second": 10.757, + "step": 252415 + }, + { + "epoch": 95.03, + "learning_rate": 9.936018065487392e-07, + "loss": 0.0903, + "step": 252500 + }, + { + "epoch": 95.07, + "learning_rate": 9.860745201354913e-07, + "loss": 0.0907, + "step": 252600 + }, + { + "epoch": 95.11, + "learning_rate": 9.785472337222431e-07, + "loss": 0.0909, + "step": 252700 + }, + { + "epoch": 95.14, + "learning_rate": 9.710199473089952e-07, + "loss": 0.0911, + "step": 252800 + }, + { + "epoch": 95.18, + "learning_rate": 9.63492660895747e-07, + "loss": 0.0916, + "step": 252900 + }, + { + "epoch": 95.22, + "learning_rate": 9.559653744824992e-07, + "loss": 0.0908, + "step": 253000 + }, + { + "epoch": 95.26, + "learning_rate": 9.484380880692511e-07, + "loss": 0.0907, + "step": 253100 + }, + { + "epoch": 95.3, + "learning_rate": 9.409108016560032e-07, + "loss": 0.0892, + "step": 253200 + }, + { + "epoch": 95.33, + "learning_rate": 9.33383515242755e-07, + "loss": 0.091, + "step": 253300 + }, + { + "epoch": 95.37, + "learning_rate": 9.25856228829507e-07, + "loss": 0.0888, + "step": 253400 + }, + { + "epoch": 95.41, + "learning_rate": 9.18328942416259e-07, + "loss": 0.0892, + "step": 253500 + }, + { + "epoch": 95.45, + "learning_rate": 9.10801656003011e-07, + "loss": 0.09, + "step": 253600 + }, + { + "epoch": 95.48, + "learning_rate": 9.032743695897629e-07, + "loss": 0.0896, + "step": 253700 + }, + { + "epoch": 95.52, + "learning_rate": 8.95747083176515e-07, + "loss": 0.0891, + "step": 253800 + }, + { + "epoch": 95.56, + "learning_rate": 8.882197967632669e-07, + "loss": 0.0897, + "step": 253900 + }, + { + "epoch": 95.6, + "learning_rate": 8.806925103500188e-07, + "loss": 0.0893, + "step": 254000 + }, + { + "epoch": 95.63, + "learning_rate": 8.731652239367709e-07, + "loss": 0.0901, + "step": 254100 + }, + { + "epoch": 95.67, + "learning_rate": 8.656379375235229e-07, + "loss": 0.0887, + "step": 254200 + }, + { + "epoch": 95.71, + "learning_rate": 8.581106511102748e-07, + "loss": 0.0886, + "step": 254300 + }, + { + "epoch": 95.75, + "learning_rate": 8.505833646970269e-07, + "loss": 0.0907, + "step": 254400 + }, + { + "epoch": 95.78, + "learning_rate": 8.430560782837787e-07, + "loss": 0.0894, + "step": 254500 + }, + { + "epoch": 95.82, + "learning_rate": 8.355287918705307e-07, + "loss": 0.0899, + "step": 254600 + }, + { + "epoch": 95.86, + "learning_rate": 8.280015054572828e-07, + "loss": 0.0881, + "step": 254700 + }, + { + "epoch": 95.9, + "learning_rate": 8.204742190440347e-07, + "loss": 0.0902, + "step": 254800 + }, + { + "epoch": 95.94, + "learning_rate": 8.129469326307867e-07, + "loss": 0.0895, + "step": 254900 + }, + { + "epoch": 95.97, + "learning_rate": 8.054196462175387e-07, + "loss": 0.0904, + "step": 255000 + }, + { + "epoch": 96.0, + "eval_loss": 0.0898142084479332, + "eval_runtime": 43.5802, + "eval_samples_per_second": 172.096, + "eval_steps_per_second": 10.762, + "step": 255072 + }, + { + "epoch": 96.01, + "learning_rate": 7.978923598042906e-07, + "loss": 0.0897, + "step": 255100 + }, + { + "epoch": 96.05, + "learning_rate": 7.903650733910425e-07, + "loss": 0.0908, + "step": 255200 + }, + { + "epoch": 96.09, + "learning_rate": 7.828377869777945e-07, + "loss": 0.0905, + "step": 255300 + }, + { + "epoch": 96.12, + "learning_rate": 7.753105005645466e-07, + "loss": 0.0892, + "step": 255400 + }, + { + "epoch": 96.16, + "learning_rate": 7.677832141512985e-07, + "loss": 0.0891, + "step": 255500 + }, + { + "epoch": 96.2, + "learning_rate": 7.602559277380505e-07, + "loss": 0.0898, + "step": 255600 + }, + { + "epoch": 96.24, + "learning_rate": 7.527286413248026e-07, + "loss": 0.0893, + "step": 255700 + }, + { + "epoch": 96.27, + "learning_rate": 7.452013549115544e-07, + "loss": 0.0896, + "step": 255800 + }, + { + "epoch": 96.31, + "learning_rate": 7.376740684983064e-07, + "loss": 0.0897, + "step": 255900 + }, + { + "epoch": 96.35, + "learning_rate": 7.301467820850584e-07, + "loss": 0.0903, + "step": 256000 + }, + { + "epoch": 96.39, + "learning_rate": 7.226194956718104e-07, + "loss": 0.0898, + "step": 256100 + }, + { + "epoch": 96.42, + "learning_rate": 7.150922092585623e-07, + "loss": 0.0895, + "step": 256200 + }, + { + "epoch": 96.46, + "learning_rate": 7.075649228453143e-07, + "loss": 0.0902, + "step": 256300 + }, + { + "epoch": 96.5, + "learning_rate": 7.000376364320663e-07, + "loss": 0.0889, + "step": 256400 + }, + { + "epoch": 96.54, + "learning_rate": 6.925103500188182e-07, + "loss": 0.0893, + "step": 256500 + }, + { + "epoch": 96.58, + "learning_rate": 6.849830636055703e-07, + "loss": 0.0901, + "step": 256600 + }, + { + "epoch": 96.61, + "learning_rate": 6.774557771923222e-07, + "loss": 0.09, + "step": 256700 + }, + { + "epoch": 96.65, + "learning_rate": 6.699284907790742e-07, + "loss": 0.0889, + "step": 256800 + }, + { + "epoch": 96.69, + "learning_rate": 6.624012043658263e-07, + "loss": 0.0887, + "step": 256900 + }, + { + "epoch": 96.73, + "learning_rate": 6.548739179525781e-07, + "loss": 0.089, + "step": 257000 + }, + { + "epoch": 96.76, + "learning_rate": 6.473466315393301e-07, + "loss": 0.0907, + "step": 257100 + }, + { + "epoch": 96.8, + "learning_rate": 6.398193451260822e-07, + "loss": 0.0888, + "step": 257200 + }, + { + "epoch": 96.84, + "learning_rate": 6.322920587128341e-07, + "loss": 0.0882, + "step": 257300 + }, + { + "epoch": 96.88, + "learning_rate": 6.247647722995861e-07, + "loss": 0.0897, + "step": 257400 + }, + { + "epoch": 96.91, + "learning_rate": 6.17237485886338e-07, + "loss": 0.0893, + "step": 257500 + }, + { + "epoch": 96.95, + "learning_rate": 6.0971019947309e-07, + "loss": 0.0892, + "step": 257600 + }, + { + "epoch": 96.99, + "learning_rate": 6.021829130598419e-07, + "loss": 0.0906, + "step": 257700 + }, + { + "epoch": 97.0, + "eval_loss": 0.08935380727052689, + "eval_runtime": 43.4106, + "eval_samples_per_second": 172.769, + "eval_steps_per_second": 10.804, + "step": 257729 + }, + { + "epoch": 97.03, + "learning_rate": 5.946556266465939e-07, + "loss": 0.0902, + "step": 257800 + }, + { + "epoch": 97.06, + "learning_rate": 5.87128340233346e-07, + "loss": 0.0891, + "step": 257900 + }, + { + "epoch": 97.1, + "learning_rate": 5.796010538200979e-07, + "loss": 0.088, + "step": 258000 + }, + { + "epoch": 97.14, + "learning_rate": 5.720737674068498e-07, + "loss": 0.0905, + "step": 258100 + }, + { + "epoch": 97.18, + "learning_rate": 5.645464809936018e-07, + "loss": 0.088, + "step": 258200 + }, + { + "epoch": 97.21, + "learning_rate": 5.570191945803538e-07, + "loss": 0.0886, + "step": 258300 + }, + { + "epoch": 97.25, + "learning_rate": 5.494919081671058e-07, + "loss": 0.0889, + "step": 258400 + }, + { + "epoch": 97.29, + "learning_rate": 5.419646217538578e-07, + "loss": 0.0877, + "step": 258500 + }, + { + "epoch": 97.33, + "learning_rate": 5.344373353406098e-07, + "loss": 0.0904, + "step": 258600 + }, + { + "epoch": 97.37, + "learning_rate": 5.269100489273618e-07, + "loss": 0.0886, + "step": 258700 + }, + { + "epoch": 97.4, + "learning_rate": 5.193827625141137e-07, + "loss": 0.0896, + "step": 258800 + }, + { + "epoch": 97.44, + "learning_rate": 5.118554761008656e-07, + "loss": 0.0884, + "step": 258900 + }, + { + "epoch": 97.48, + "learning_rate": 5.043281896876177e-07, + "loss": 0.0896, + "step": 259000 + }, + { + "epoch": 97.52, + "learning_rate": 4.968009032743696e-07, + "loss": 0.0886, + "step": 259100 + }, + { + "epoch": 97.55, + "learning_rate": 4.892736168611216e-07, + "loss": 0.0895, + "step": 259200 + }, + { + "epoch": 97.59, + "learning_rate": 4.817463304478736e-07, + "loss": 0.0889, + "step": 259300 + }, + { + "epoch": 97.63, + "learning_rate": 4.7421904403462554e-07, + "loss": 0.0888, + "step": 259400 + }, + { + "epoch": 97.67, + "learning_rate": 4.666917576213775e-07, + "loss": 0.0897, + "step": 259500 + }, + { + "epoch": 97.7, + "learning_rate": 4.591644712081295e-07, + "loss": 0.0903, + "step": 259600 + }, + { + "epoch": 97.74, + "learning_rate": 4.5163718479488144e-07, + "loss": 0.0883, + "step": 259700 + }, + { + "epoch": 97.78, + "learning_rate": 4.4410989838163347e-07, + "loss": 0.0902, + "step": 259800 + }, + { + "epoch": 97.82, + "learning_rate": 4.3658261196838546e-07, + "loss": 0.09, + "step": 259900 + }, + { + "epoch": 97.85, + "learning_rate": 4.290553255551374e-07, + "loss": 0.0883, + "step": 260000 + }, + { + "epoch": 97.89, + "learning_rate": 4.2152803914188937e-07, + "loss": 0.0904, + "step": 260100 + }, + { + "epoch": 97.93, + "learning_rate": 4.140007527286414e-07, + "loss": 0.0891, + "step": 260200 + }, + { + "epoch": 97.97, + "learning_rate": 4.0647346631539334e-07, + "loss": 0.0892, + "step": 260300 + }, + { + "epoch": 98.0, + "eval_loss": 0.08942902088165283, + "eval_runtime": 43.3793, + "eval_samples_per_second": 172.893, + "eval_steps_per_second": 10.812, + "step": 260386 + }, + { + "epoch": 98.01, + "learning_rate": 3.989461799021453e-07, + "loss": 0.0902, + "step": 260400 + }, + { + "epoch": 98.04, + "learning_rate": 3.9141889348889725e-07, + "loss": 0.0906, + "step": 260500 + }, + { + "epoch": 98.08, + "learning_rate": 3.8389160707564924e-07, + "loss": 0.0889, + "step": 260600 + }, + { + "epoch": 98.12, + "learning_rate": 3.763643206624013e-07, + "loss": 0.0907, + "step": 260700 + }, + { + "epoch": 98.16, + "learning_rate": 3.688370342491532e-07, + "loss": 0.0879, + "step": 260800 + }, + { + "epoch": 98.19, + "learning_rate": 3.613097478359052e-07, + "loss": 0.0877, + "step": 260900 + }, + { + "epoch": 98.23, + "learning_rate": 3.537824614226572e-07, + "loss": 0.0895, + "step": 261000 + }, + { + "epoch": 98.27, + "learning_rate": 3.462551750094091e-07, + "loss": 0.0903, + "step": 261100 + }, + { + "epoch": 98.31, + "learning_rate": 3.387278885961611e-07, + "loss": 0.0897, + "step": 261200 + }, + { + "epoch": 98.34, + "learning_rate": 3.312006021829131e-07, + "loss": 0.0886, + "step": 261300 + }, + { + "epoch": 98.38, + "learning_rate": 3.2367331576966506e-07, + "loss": 0.0882, + "step": 261400 + }, + { + "epoch": 98.42, + "learning_rate": 3.1614602935641704e-07, + "loss": 0.0894, + "step": 261500 + }, + { + "epoch": 98.46, + "learning_rate": 3.08618742943169e-07, + "loss": 0.0901, + "step": 261600 + }, + { + "epoch": 98.49, + "learning_rate": 3.0109145652992096e-07, + "loss": 0.0909, + "step": 261700 + }, + { + "epoch": 98.53, + "learning_rate": 2.93564170116673e-07, + "loss": 0.0897, + "step": 261800 + }, + { + "epoch": 98.57, + "learning_rate": 2.860368837034249e-07, + "loss": 0.0907, + "step": 261900 + }, + { + "epoch": 98.61, + "learning_rate": 2.785095972901769e-07, + "loss": 0.0885, + "step": 262000 + }, + { + "epoch": 98.65, + "learning_rate": 2.709823108769289e-07, + "loss": 0.0898, + "step": 262100 + }, + { + "epoch": 98.68, + "learning_rate": 2.634550244636809e-07, + "loss": 0.0897, + "step": 262200 + }, + { + "epoch": 98.72, + "learning_rate": 2.559277380504328e-07, + "loss": 0.0887, + "step": 262300 + }, + { + "epoch": 98.76, + "learning_rate": 2.484004516371848e-07, + "loss": 0.0888, + "step": 262400 + }, + { + "epoch": 98.8, + "learning_rate": 2.408731652239368e-07, + "loss": 0.0897, + "step": 262500 + }, + { + "epoch": 98.83, + "learning_rate": 2.3334587881068876e-07, + "loss": 0.0894, + "step": 262600 + }, + { + "epoch": 98.87, + "learning_rate": 2.2581859239744072e-07, + "loss": 0.0902, + "step": 262700 + }, + { + "epoch": 98.91, + "learning_rate": 2.1829130598419273e-07, + "loss": 0.0898, + "step": 262800 + }, + { + "epoch": 98.95, + "learning_rate": 2.1076401957094469e-07, + "loss": 0.0908, + "step": 262900 + }, + { + "epoch": 98.98, + "learning_rate": 2.0323673315769667e-07, + "loss": 0.0881, + "step": 263000 + }, + { + "epoch": 99.0, + "eval_loss": 0.08917281776666641, + "eval_runtime": 43.4811, + "eval_samples_per_second": 172.489, + "eval_steps_per_second": 10.786, + "step": 263043 + }, + { + "epoch": 99.02, + "learning_rate": 1.9570944674444863e-07, + "loss": 0.0909, + "step": 263100 + }, + { + "epoch": 99.06, + "learning_rate": 1.8818216033120064e-07, + "loss": 0.0888, + "step": 263200 + }, + { + "epoch": 99.1, + "learning_rate": 1.806548739179526e-07, + "loss": 0.0897, + "step": 263300 + }, + { + "epoch": 99.13, + "learning_rate": 1.7312758750470455e-07, + "loss": 0.0897, + "step": 263400 + }, + { + "epoch": 99.17, + "learning_rate": 1.6560030109145656e-07, + "loss": 0.0882, + "step": 263500 + }, + { + "epoch": 99.21, + "learning_rate": 1.5807301467820852e-07, + "loss": 0.0889, + "step": 263600 + }, + { + "epoch": 99.25, + "learning_rate": 1.5054572826496048e-07, + "loss": 0.0908, + "step": 263700 + }, + { + "epoch": 99.28, + "learning_rate": 1.4301844185171246e-07, + "loss": 0.088, + "step": 263800 + }, + { + "epoch": 99.32, + "learning_rate": 1.3549115543846445e-07, + "loss": 0.0876, + "step": 263900 + }, + { + "epoch": 99.36, + "learning_rate": 1.279638690252164e-07, + "loss": 0.088, + "step": 264000 + }, + { + "epoch": 99.4, + "learning_rate": 1.204365826119684e-07, + "loss": 0.0892, + "step": 264100 + }, + { + "epoch": 99.44, + "learning_rate": 1.1290929619872036e-07, + "loss": 0.0895, + "step": 264200 + }, + { + "epoch": 99.47, + "learning_rate": 1.0538200978547234e-07, + "loss": 0.0899, + "step": 264300 + }, + { + "epoch": 99.51, + "learning_rate": 9.785472337222431e-08, + "loss": 0.089, + "step": 264400 + }, + { + "epoch": 99.55, + "learning_rate": 9.03274369589763e-08, + "loss": 0.0895, + "step": 264500 + }, + { + "epoch": 99.59, + "learning_rate": 8.280015054572828e-08, + "loss": 0.091, + "step": 264600 + }, + { + "epoch": 99.62, + "learning_rate": 7.527286413248024e-08, + "loss": 0.0896, + "step": 264700 + }, + { + "epoch": 99.66, + "learning_rate": 6.774557771923222e-08, + "loss": 0.0894, + "step": 264800 + }, + { + "epoch": 99.7, + "learning_rate": 6.02182913059842e-08, + "loss": 0.0898, + "step": 264900 + }, + { + "epoch": 99.74, + "learning_rate": 5.269100489273617e-08, + "loss": 0.0882, + "step": 265000 + }, + { + "epoch": 99.77, + "learning_rate": 4.516371847948815e-08, + "loss": 0.0901, + "step": 265100 + }, + { + "epoch": 99.81, + "learning_rate": 3.763643206624012e-08, + "loss": 0.0885, + "step": 265200 + }, + { + "epoch": 99.85, + "learning_rate": 3.01091456529921e-08, + "loss": 0.0901, + "step": 265300 + }, + { + "epoch": 99.89, + "learning_rate": 2.2581859239744074e-08, + "loss": 0.0897, + "step": 265400 + }, + { + "epoch": 99.92, + "learning_rate": 1.505457282649605e-08, + "loss": 0.0902, + "step": 265500 + }, + { + "epoch": 99.96, + "learning_rate": 7.527286413248024e-09, + "loss": 0.0911, + "step": 265600 + }, + { + "epoch": 100.0, + "learning_rate": 0.0, + "loss": 0.09, + "step": 265700 + }, + { + "epoch": 100.0, + "eval_loss": 0.08935302495956421, + "eval_runtime": 44.5127, + "eval_samples_per_second": 168.491, + "eval_steps_per_second": 10.536, + "step": 265700 + }, + { + "epoch": 100.0, + "step": 265700, + "total_flos": 3.31604966375424e+20, + "train_loss": 0.10943094944119408, + "train_runtime": 65782.603, + "train_samples_per_second": 64.607, + "train_steps_per_second": 4.039 + } + ], + "max_steps": 265700, + "num_train_epochs": 100, + "total_flos": 3.31604966375424e+20, + "trial_name": null, + "trial_params": null +}