{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 2500, "global_step": 26400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.4848484848484856e-07, "loss": 20.3359, "step": 50 }, { "epoch": 0.02, "learning_rate": 7.272727272727273e-07, "loss": 17.6992, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.0984848484848485e-06, "loss": 18.4227, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.4696969696969698e-06, "loss": 17.7956, "step": 200 }, { "epoch": 0.06, "learning_rate": 1.8484848484848487e-06, "loss": 16.5278, "step": 250 }, { "epoch": 0.07, "learning_rate": 2.2272727272727274e-06, "loss": 16.4439, "step": 300 }, { "epoch": 0.08, "learning_rate": 2.6060606060606064e-06, "loss": 14.4224, "step": 350 }, { "epoch": 0.09, "learning_rate": 2.984848484848485e-06, "loss": 13.3431, "step": 400 }, { "epoch": 0.1, "learning_rate": 3.3636363636363637e-06, "loss": 11.9916, "step": 450 }, { "epoch": 0.11, "learning_rate": 3.742424242424243e-06, "loss": 10.8184, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.1212121212121215e-06, "loss": 10.1329, "step": 550 }, { "epoch": 0.14, "learning_rate": 4.5e-06, "loss": 9.9713, "step": 600 }, { "epoch": 0.15, "learning_rate": 4.878787878787879e-06, "loss": 9.165, "step": 650 }, { "epoch": 0.16, "learning_rate": 5.257575757575758e-06, "loss": 7.7716, "step": 700 }, { "epoch": 0.17, "learning_rate": 5.636363636363636e-06, "loss": 7.3042, "step": 750 }, { "epoch": 0.18, "learning_rate": 6.015151515151516e-06, "loss": 6.5469, "step": 800 }, { "epoch": 0.19, "learning_rate": 6.393939393939394e-06, "loss": 6.1649, "step": 850 }, { "epoch": 0.2, "learning_rate": 6.772727272727273e-06, "loss": 5.7559, "step": 900 }, { "epoch": 0.22, "learning_rate": 7.151515151515152e-06, "loss": 5.3576, "step": 950 }, { "epoch": 0.23, "learning_rate": 7.530303030303031e-06, "loss": 5.2641, "step": 1000 }, { "epoch": 0.24, "learning_rate": 7.909090909090909e-06, "loss": 4.8608, "step": 1050 }, { "epoch": 0.25, "learning_rate": 8.287878787878787e-06, "loss": 4.838, "step": 1100 }, { "epoch": 0.26, "learning_rate": 8.666666666666668e-06, "loss": 4.5432, "step": 1150 }, { "epoch": 0.27, "learning_rate": 9.045454545454546e-06, "loss": 4.2642, "step": 1200 }, { "epoch": 0.28, "learning_rate": 9.424242424242425e-06, "loss": 4.0826, "step": 1250 }, { "epoch": 0.3, "learning_rate": 9.803030303030304e-06, "loss": 4.216, "step": 1300 }, { "epoch": 0.31, "learning_rate": 1.0181818181818182e-05, "loss": 3.7907, "step": 1350 }, { "epoch": 0.32, "learning_rate": 1.0560606060606063e-05, "loss": 3.6822, "step": 1400 }, { "epoch": 0.33, "learning_rate": 1.0939393939393941e-05, "loss": 3.6232, "step": 1450 }, { "epoch": 0.34, "learning_rate": 1.131818181818182e-05, "loss": 3.6947, "step": 1500 }, { "epoch": 0.35, "learning_rate": 1.1696969696969697e-05, "loss": 3.5923, "step": 1550 }, { "epoch": 0.36, "learning_rate": 1.2075757575757576e-05, "loss": 3.5199, "step": 1600 }, { "epoch": 0.38, "learning_rate": 1.2454545454545454e-05, "loss": 3.47, "step": 1650 }, { "epoch": 0.39, "learning_rate": 1.2833333333333335e-05, "loss": 3.4292, "step": 1700 }, { "epoch": 0.4, "learning_rate": 1.3212121212121213e-05, "loss": 3.4521, "step": 1750 }, { "epoch": 0.41, "learning_rate": 1.3590909090909092e-05, "loss": 3.4836, "step": 1800 }, { "epoch": 0.42, "learning_rate": 1.396969696969697e-05, "loss": 3.3962, "step": 1850 }, { "epoch": 0.43, "learning_rate": 1.434848484848485e-05, "loss": 3.4031, "step": 1900 }, { "epoch": 0.44, "learning_rate": 1.4727272727272728e-05, "loss": 3.427, "step": 1950 }, { "epoch": 0.45, "learning_rate": 1.5106060606060607e-05, "loss": 3.3832, "step": 2000 }, { "epoch": 0.47, "learning_rate": 1.548484848484849e-05, "loss": 3.4535, "step": 2050 }, { "epoch": 0.48, "learning_rate": 1.5863636363636364e-05, "loss": 3.3963, "step": 2100 }, { "epoch": 0.49, "learning_rate": 1.6242424242424243e-05, "loss": 3.4299, "step": 2150 }, { "epoch": 0.5, "learning_rate": 1.662121212121212e-05, "loss": 3.4213, "step": 2200 }, { "epoch": 0.51, "learning_rate": 1.7e-05, "loss": 3.3911, "step": 2250 }, { "epoch": 0.52, "learning_rate": 1.737878787878788e-05, "loss": 3.3448, "step": 2300 }, { "epoch": 0.53, "learning_rate": 1.775757575757576e-05, "loss": 3.4302, "step": 2350 }, { "epoch": 0.55, "learning_rate": 1.813636363636364e-05, "loss": 3.3987, "step": 2400 }, { "epoch": 0.56, "learning_rate": 1.8515151515151518e-05, "loss": 3.3569, "step": 2450 }, { "epoch": 0.57, "learning_rate": 1.8893939393939397e-05, "loss": 3.4067, "step": 2500 }, { "epoch": 0.57, "eval_loss": 3.577014446258545, "eval_runtime": 11.1943, "eval_samples_per_second": 126.85, "eval_steps_per_second": 7.95, "eval_wer": 1.0, "step": 2500 }, { "epoch": 0.58, "learning_rate": 1.9272727272727275e-05, "loss": 3.4038, "step": 2550 }, { "epoch": 0.59, "learning_rate": 1.9651515151515154e-05, "loss": 3.4909, "step": 2600 }, { "epoch": 0.6, "learning_rate": 1.9996632996632998e-05, "loss": 3.3383, "step": 2650 }, { "epoch": 0.61, "learning_rate": 1.9954545454545455e-05, "loss": 3.5182, "step": 2700 }, { "epoch": 0.62, "learning_rate": 1.9912457912457913e-05, "loss": 3.368, "step": 2750 }, { "epoch": 0.64, "learning_rate": 1.987037037037037e-05, "loss": 3.369, "step": 2800 }, { "epoch": 0.65, "learning_rate": 1.982828282828283e-05, "loss": 3.3429, "step": 2850 }, { "epoch": 0.66, "learning_rate": 1.978619528619529e-05, "loss": 3.428, "step": 2900 }, { "epoch": 0.67, "learning_rate": 1.9744107744107747e-05, "loss": 3.414, "step": 2950 }, { "epoch": 0.68, "learning_rate": 1.9702020202020204e-05, "loss": 3.3328, "step": 3000 }, { "epoch": 0.69, "learning_rate": 1.965993265993266e-05, "loss": 3.3332, "step": 3050 }, { "epoch": 0.7, "learning_rate": 1.961784511784512e-05, "loss": 3.4696, "step": 3100 }, { "epoch": 0.72, "learning_rate": 1.9575757575757577e-05, "loss": 3.4222, "step": 3150 }, { "epoch": 0.73, "learning_rate": 1.9533670033670034e-05, "loss": 3.3857, "step": 3200 }, { "epoch": 0.74, "learning_rate": 1.9491582491582492e-05, "loss": 3.4623, "step": 3250 }, { "epoch": 0.75, "learning_rate": 1.9449494949494953e-05, "loss": 3.5146, "step": 3300 }, { "epoch": 0.76, "learning_rate": 1.9407407407407407e-05, "loss": 3.4005, "step": 3350 }, { "epoch": 0.77, "learning_rate": 1.936616161616162e-05, "loss": 3.3341, "step": 3400 }, { "epoch": 0.78, "learning_rate": 1.9324074074074074e-05, "loss": 3.4664, "step": 3450 }, { "epoch": 0.8, "learning_rate": 1.9281986531986535e-05, "loss": 3.3314, "step": 3500 }, { "epoch": 0.81, "learning_rate": 1.9239898989898992e-05, "loss": 3.479, "step": 3550 }, { "epoch": 0.82, "learning_rate": 1.919781144781145e-05, "loss": 3.3813, "step": 3600 }, { "epoch": 0.83, "learning_rate": 1.9155723905723907e-05, "loss": 3.2613, "step": 3650 }, { "epoch": 0.84, "learning_rate": 1.9113636363636365e-05, "loss": 3.2466, "step": 3700 }, { "epoch": 0.85, "learning_rate": 1.9071548821548823e-05, "loss": 3.2185, "step": 3750 }, { "epoch": 0.86, "learning_rate": 1.902946127946128e-05, "loss": 3.2956, "step": 3800 }, { "epoch": 0.88, "learning_rate": 1.898737373737374e-05, "loss": 3.1443, "step": 3850 }, { "epoch": 0.89, "learning_rate": 1.8945286195286195e-05, "loss": 3.1665, "step": 3900 }, { "epoch": 0.9, "learning_rate": 1.8903198653198656e-05, "loss": 3.2572, "step": 3950 }, { "epoch": 0.91, "learning_rate": 1.8861111111111114e-05, "loss": 3.0124, "step": 4000 }, { "epoch": 0.92, "learning_rate": 1.881902356902357e-05, "loss": 2.9797, "step": 4050 }, { "epoch": 0.93, "learning_rate": 1.877693602693603e-05, "loss": 2.9749, "step": 4100 }, { "epoch": 0.94, "learning_rate": 1.8734848484848486e-05, "loss": 2.9438, "step": 4150 }, { "epoch": 0.95, "learning_rate": 1.8692760942760944e-05, "loss": 2.9927, "step": 4200 }, { "epoch": 0.97, "learning_rate": 1.86506734006734e-05, "loss": 2.9932, "step": 4250 }, { "epoch": 0.98, "learning_rate": 1.860858585858586e-05, "loss": 2.8843, "step": 4300 }, { "epoch": 0.99, "learning_rate": 1.8566498316498317e-05, "loss": 3.0349, "step": 4350 }, { "epoch": 1.0, "learning_rate": 1.8524410774410774e-05, "loss": 3.0206, "step": 4400 }, { "epoch": 1.01, "learning_rate": 1.8482323232323235e-05, "loss": 3.1372, "step": 4450 }, { "epoch": 1.02, "learning_rate": 1.8440235690235693e-05, "loss": 3.0921, "step": 4500 }, { "epoch": 1.03, "learning_rate": 1.839814814814815e-05, "loss": 2.8954, "step": 4550 }, { "epoch": 1.05, "learning_rate": 1.8356060606060608e-05, "loss": 2.886, "step": 4600 }, { "epoch": 1.06, "learning_rate": 1.8313973063973065e-05, "loss": 2.9639, "step": 4650 }, { "epoch": 1.07, "learning_rate": 1.8271885521885523e-05, "loss": 2.6322, "step": 4700 }, { "epoch": 1.08, "learning_rate": 1.822979797979798e-05, "loss": 2.5935, "step": 4750 }, { "epoch": 1.09, "learning_rate": 1.818771043771044e-05, "loss": 2.811, "step": 4800 }, { "epoch": 1.1, "learning_rate": 1.8145622895622895e-05, "loss": 2.4714, "step": 4850 }, { "epoch": 1.11, "learning_rate": 1.8104377104377108e-05, "loss": 2.4339, "step": 4900 }, { "epoch": 1.12, "learning_rate": 1.8062289562289562e-05, "loss": 2.3928, "step": 4950 }, { "epoch": 1.14, "learning_rate": 1.8020202020202023e-05, "loss": 2.3866, "step": 5000 }, { "epoch": 1.14, "eval_loss": 2.1913039684295654, "eval_runtime": 10.1283, "eval_samples_per_second": 140.202, "eval_steps_per_second": 8.787, "eval_wer": 1.0235998910774258, "step": 5000 }, { "epoch": 1.15, "learning_rate": 1.7978114478114477e-05, "loss": 2.3114, "step": 5050 }, { "epoch": 1.16, "learning_rate": 1.793602693602694e-05, "loss": 2.6422, "step": 5100 }, { "epoch": 1.17, "learning_rate": 1.7893939393939396e-05, "loss": 2.2676, "step": 5150 }, { "epoch": 1.18, "learning_rate": 1.7851851851851853e-05, "loss": 2.2543, "step": 5200 }, { "epoch": 1.19, "learning_rate": 1.780976430976431e-05, "loss": 2.5168, "step": 5250 }, { "epoch": 1.2, "learning_rate": 1.776767676767677e-05, "loss": 2.2676, "step": 5300 }, { "epoch": 1.22, "learning_rate": 1.772558922558923e-05, "loss": 2.159, "step": 5350 }, { "epoch": 1.23, "learning_rate": 1.7683501683501684e-05, "loss": 2.1864, "step": 5400 }, { "epoch": 1.24, "learning_rate": 1.7641414141414145e-05, "loss": 2.1289, "step": 5450 }, { "epoch": 1.25, "learning_rate": 1.7599326599326602e-05, "loss": 2.1965, "step": 5500 }, { "epoch": 1.26, "learning_rate": 1.755723905723906e-05, "loss": 2.1027, "step": 5550 }, { "epoch": 1.27, "learning_rate": 1.7515151515151517e-05, "loss": 2.0938, "step": 5600 }, { "epoch": 1.28, "learning_rate": 1.7473063973063975e-05, "loss": 1.8391, "step": 5650 }, { "epoch": 1.3, "learning_rate": 1.7430976430976432e-05, "loss": 1.7619, "step": 5700 }, { "epoch": 1.31, "learning_rate": 1.738888888888889e-05, "loss": 2.0289, "step": 5750 }, { "epoch": 1.32, "learning_rate": 1.7346801346801347e-05, "loss": 1.8786, "step": 5800 }, { "epoch": 1.33, "learning_rate": 1.7304713804713805e-05, "loss": 1.8296, "step": 5850 }, { "epoch": 1.34, "learning_rate": 1.7262626262626263e-05, "loss": 2.0458, "step": 5900 }, { "epoch": 1.35, "learning_rate": 1.7220538720538724e-05, "loss": 1.6771, "step": 5950 }, { "epoch": 1.36, "learning_rate": 1.717845117845118e-05, "loss": 1.7184, "step": 6000 }, { "epoch": 1.38, "learning_rate": 1.713636363636364e-05, "loss": 1.6993, "step": 6050 }, { "epoch": 1.39, "learning_rate": 1.7094276094276096e-05, "loss": 1.5734, "step": 6100 }, { "epoch": 1.4, "learning_rate": 1.7052188552188554e-05, "loss": 1.5645, "step": 6150 }, { "epoch": 1.41, "learning_rate": 1.701010101010101e-05, "loss": 1.5143, "step": 6200 }, { "epoch": 1.42, "learning_rate": 1.696801346801347e-05, "loss": 2.114, "step": 6250 }, { "epoch": 1.43, "learning_rate": 1.6925925925925926e-05, "loss": 1.6193, "step": 6300 }, { "epoch": 1.44, "learning_rate": 1.6883838383838384e-05, "loss": 1.5596, "step": 6350 }, { "epoch": 1.45, "learning_rate": 1.6841750841750845e-05, "loss": 1.849, "step": 6400 }, { "epoch": 1.47, "learning_rate": 1.67996632996633e-05, "loss": 1.533, "step": 6450 }, { "epoch": 1.48, "learning_rate": 1.675757575757576e-05, "loss": 1.5936, "step": 6500 }, { "epoch": 1.49, "learning_rate": 1.6715488215488218e-05, "loss": 1.9707, "step": 6550 }, { "epoch": 1.5, "learning_rate": 1.6673400673400675e-05, "loss": 1.6483, "step": 6600 }, { "epoch": 1.51, "learning_rate": 1.6631313131313133e-05, "loss": 1.6503, "step": 6650 }, { "epoch": 1.52, "learning_rate": 1.658922558922559e-05, "loss": 1.7274, "step": 6700 }, { "epoch": 1.53, "learning_rate": 1.6547138047138048e-05, "loss": 1.6686, "step": 6750 }, { "epoch": 1.55, "learning_rate": 1.6505050505050505e-05, "loss": 1.5304, "step": 6800 }, { "epoch": 1.56, "learning_rate": 1.6462962962962966e-05, "loss": 1.3636, "step": 6850 }, { "epoch": 1.57, "learning_rate": 1.642087542087542e-05, "loss": 1.2884, "step": 6900 }, { "epoch": 1.58, "learning_rate": 1.637878787878788e-05, "loss": 1.5294, "step": 6950 }, { "epoch": 1.59, "learning_rate": 1.633670033670034e-05, "loss": 1.5356, "step": 7000 }, { "epoch": 1.6, "learning_rate": 1.6294612794612796e-05, "loss": 1.3595, "step": 7050 }, { "epoch": 1.61, "learning_rate": 1.6252525252525254e-05, "loss": 1.2942, "step": 7100 }, { "epoch": 1.62, "learning_rate": 1.621043771043771e-05, "loss": 1.2498, "step": 7150 }, { "epoch": 1.64, "learning_rate": 1.616835016835017e-05, "loss": 1.2096, "step": 7200 }, { "epoch": 1.65, "learning_rate": 1.6126262626262627e-05, "loss": 1.7646, "step": 7250 }, { "epoch": 1.66, "learning_rate": 1.6084175084175088e-05, "loss": 1.209, "step": 7300 }, { "epoch": 1.67, "learning_rate": 1.6042087542087542e-05, "loss": 1.3591, "step": 7350 }, { "epoch": 1.68, "learning_rate": 1.6000000000000003e-05, "loss": 1.2967, "step": 7400 }, { "epoch": 1.69, "learning_rate": 1.595791245791246e-05, "loss": 1.2097, "step": 7450 }, { "epoch": 1.7, "learning_rate": 1.5915824915824918e-05, "loss": 1.2369, "step": 7500 }, { "epoch": 1.7, "eval_loss": 1.0120502710342407, "eval_runtime": 10.2706, "eval_samples_per_second": 138.259, "eval_steps_per_second": 8.666, "eval_wer": 0.684396841245348, "step": 7500 }, { "epoch": 1.72, "learning_rate": 1.5873737373737375e-05, "loss": 1.378, "step": 7550 }, { "epoch": 1.73, "learning_rate": 1.5831649831649833e-05, "loss": 1.1997, "step": 7600 }, { "epoch": 1.74, "learning_rate": 1.578956228956229e-05, "loss": 1.2648, "step": 7650 }, { "epoch": 1.75, "learning_rate": 1.5747474747474748e-05, "loss": 1.3954, "step": 7700 }, { "epoch": 1.76, "learning_rate": 1.5705387205387206e-05, "loss": 1.2695, "step": 7750 }, { "epoch": 1.77, "learning_rate": 1.5663299663299666e-05, "loss": 1.1902, "step": 7800 }, { "epoch": 1.78, "learning_rate": 1.562121212121212e-05, "loss": 1.2488, "step": 7850 }, { "epoch": 1.8, "learning_rate": 1.557912457912458e-05, "loss": 1.4412, "step": 7900 }, { "epoch": 1.81, "learning_rate": 1.553703703703704e-05, "loss": 1.1405, "step": 7950 }, { "epoch": 1.82, "learning_rate": 1.5494949494949497e-05, "loss": 1.1864, "step": 8000 }, { "epoch": 1.83, "learning_rate": 1.5452861952861954e-05, "loss": 1.0926, "step": 8050 }, { "epoch": 1.84, "learning_rate": 1.5410774410774412e-05, "loss": 1.0923, "step": 8100 }, { "epoch": 1.85, "learning_rate": 1.536868686868687e-05, "loss": 1.1358, "step": 8150 }, { "epoch": 1.86, "learning_rate": 1.5326599326599327e-05, "loss": 1.1179, "step": 8200 }, { "epoch": 1.88, "learning_rate": 1.5284511784511788e-05, "loss": 1.2469, "step": 8250 }, { "epoch": 1.89, "learning_rate": 1.5242424242424244e-05, "loss": 1.1038, "step": 8300 }, { "epoch": 1.9, "learning_rate": 1.5200336700336701e-05, "loss": 1.0795, "step": 8350 }, { "epoch": 1.91, "learning_rate": 1.5158249158249159e-05, "loss": 1.2395, "step": 8400 }, { "epoch": 1.92, "learning_rate": 1.5116161616161618e-05, "loss": 1.2588, "step": 8450 }, { "epoch": 1.93, "learning_rate": 1.5074074074074074e-05, "loss": 1.1212, "step": 8500 }, { "epoch": 1.94, "learning_rate": 1.5031986531986533e-05, "loss": 1.3742, "step": 8550 }, { "epoch": 1.95, "learning_rate": 1.4989898989898992e-05, "loss": 1.3141, "step": 8600 }, { "epoch": 1.97, "learning_rate": 1.4947811447811448e-05, "loss": 1.1348, "step": 8650 }, { "epoch": 1.98, "learning_rate": 1.4905723905723907e-05, "loss": 1.027, "step": 8700 }, { "epoch": 1.99, "learning_rate": 1.4863636363636365e-05, "loss": 1.5249, "step": 8750 }, { "epoch": 2.0, "learning_rate": 1.4821548821548824e-05, "loss": 1.0067, "step": 8800 }, { "epoch": 2.01, "learning_rate": 1.477946127946128e-05, "loss": 1.1649, "step": 8850 }, { "epoch": 2.02, "learning_rate": 1.473737373737374e-05, "loss": 1.1166, "step": 8900 }, { "epoch": 2.03, "learning_rate": 1.4695286195286195e-05, "loss": 1.1776, "step": 8950 }, { "epoch": 2.05, "learning_rate": 1.4653198653198654e-05, "loss": 1.188, "step": 9000 }, { "epoch": 2.06, "learning_rate": 1.4611111111111112e-05, "loss": 1.0091, "step": 9050 }, { "epoch": 2.07, "learning_rate": 1.456902356902357e-05, "loss": 1.0536, "step": 9100 }, { "epoch": 2.08, "learning_rate": 1.4526936026936027e-05, "loss": 1.0909, "step": 9150 }, { "epoch": 2.09, "learning_rate": 1.4484848484848486e-05, "loss": 1.0125, "step": 9200 }, { "epoch": 2.1, "learning_rate": 1.4442760942760946e-05, "loss": 1.3341, "step": 9250 }, { "epoch": 2.11, "learning_rate": 1.4400673400673401e-05, "loss": 1.0577, "step": 9300 }, { "epoch": 2.12, "learning_rate": 1.435858585858586e-05, "loss": 1.1016, "step": 9350 }, { "epoch": 2.14, "learning_rate": 1.4316498316498317e-05, "loss": 1.2768, "step": 9400 }, { "epoch": 2.15, "learning_rate": 1.4274410774410776e-05, "loss": 1.0938, "step": 9450 }, { "epoch": 2.16, "learning_rate": 1.4232323232323233e-05, "loss": 1.1356, "step": 9500 }, { "epoch": 2.17, "learning_rate": 1.4190235690235693e-05, "loss": 1.0515, "step": 9550 }, { "epoch": 2.18, "learning_rate": 1.4148148148148148e-05, "loss": 0.9527, "step": 9600 }, { "epoch": 2.19, "learning_rate": 1.4106060606060608e-05, "loss": 1.7257, "step": 9650 }, { "epoch": 2.2, "learning_rate": 1.4063973063973064e-05, "loss": 1.129, "step": 9700 }, { "epoch": 2.22, "learning_rate": 1.4021885521885523e-05, "loss": 0.9502, "step": 9750 }, { "epoch": 2.23, "learning_rate": 1.397979797979798e-05, "loss": 1.025, "step": 9800 }, { "epoch": 2.24, "learning_rate": 1.393771043771044e-05, "loss": 1.0832, "step": 9850 }, { "epoch": 2.25, "learning_rate": 1.3896464646464647e-05, "loss": 0.9857, "step": 9900 }, { "epoch": 2.26, "learning_rate": 1.3854377104377107e-05, "loss": 1.3888, "step": 9950 }, { "epoch": 2.27, "learning_rate": 1.3812289562289562e-05, "loss": 1.3609, "step": 10000 }, { "epoch": 2.27, "eval_loss": 0.9048006534576416, "eval_runtime": 10.2795, "eval_samples_per_second": 138.138, "eval_steps_per_second": 8.658, "eval_wer": 0.5847326858491423, "step": 10000 }, { "epoch": 2.28, "learning_rate": 1.3770202020202022e-05, "loss": 0.9839, "step": 10050 }, { "epoch": 2.3, "learning_rate": 1.3728114478114477e-05, "loss": 1.0667, "step": 10100 }, { "epoch": 2.31, "learning_rate": 1.3686026936026937e-05, "loss": 1.337, "step": 10150 }, { "epoch": 2.32, "learning_rate": 1.3643939393939396e-05, "loss": 0.9717, "step": 10200 }, { "epoch": 2.33, "learning_rate": 1.3601851851851854e-05, "loss": 0.9965, "step": 10250 }, { "epoch": 2.34, "learning_rate": 1.3559764309764311e-05, "loss": 1.1371, "step": 10300 }, { "epoch": 2.35, "learning_rate": 1.3517676767676769e-05, "loss": 1.376, "step": 10350 }, { "epoch": 2.36, "learning_rate": 1.3475589225589228e-05, "loss": 1.0109, "step": 10400 }, { "epoch": 2.38, "learning_rate": 1.3433501683501684e-05, "loss": 1.0311, "step": 10450 }, { "epoch": 2.39, "learning_rate": 1.3391414141414143e-05, "loss": 1.2125, "step": 10500 }, { "epoch": 2.4, "learning_rate": 1.33493265993266e-05, "loss": 1.4322, "step": 10550 }, { "epoch": 2.41, "learning_rate": 1.3307239057239058e-05, "loss": 0.9145, "step": 10600 }, { "epoch": 2.42, "learning_rate": 1.3265151515151516e-05, "loss": 1.2237, "step": 10650 }, { "epoch": 2.43, "learning_rate": 1.3223063973063975e-05, "loss": 1.0428, "step": 10700 }, { "epoch": 2.44, "learning_rate": 1.318097643097643e-05, "loss": 0.9095, "step": 10750 }, { "epoch": 2.45, "learning_rate": 1.313888888888889e-05, "loss": 1.1105, "step": 10800 }, { "epoch": 2.47, "learning_rate": 1.309680134680135e-05, "loss": 1.2221, "step": 10850 }, { "epoch": 2.48, "learning_rate": 1.3054713804713805e-05, "loss": 0.8747, "step": 10900 }, { "epoch": 2.49, "learning_rate": 1.3012626262626264e-05, "loss": 0.9897, "step": 10950 }, { "epoch": 2.5, "learning_rate": 1.2970538720538722e-05, "loss": 1.0568, "step": 11000 }, { "epoch": 2.51, "learning_rate": 1.292845117845118e-05, "loss": 1.1523, "step": 11050 }, { "epoch": 2.52, "learning_rate": 1.2886363636363637e-05, "loss": 1.1768, "step": 11100 }, { "epoch": 2.53, "learning_rate": 1.2844276094276096e-05, "loss": 0.9121, "step": 11150 }, { "epoch": 2.55, "learning_rate": 1.2802188552188552e-05, "loss": 0.8622, "step": 11200 }, { "epoch": 2.56, "learning_rate": 1.2760101010101011e-05, "loss": 0.9122, "step": 11250 }, { "epoch": 2.57, "learning_rate": 1.2718013468013469e-05, "loss": 0.9617, "step": 11300 }, { "epoch": 2.58, "learning_rate": 1.2675925925925926e-05, "loss": 1.0195, "step": 11350 }, { "epoch": 2.59, "learning_rate": 1.2633838383838384e-05, "loss": 1.077, "step": 11400 }, { "epoch": 2.6, "learning_rate": 1.2591750841750843e-05, "loss": 1.1254, "step": 11450 }, { "epoch": 2.61, "learning_rate": 1.2549663299663302e-05, "loss": 0.923, "step": 11500 }, { "epoch": 2.62, "learning_rate": 1.2507575757575758e-05, "loss": 1.2215, "step": 11550 }, { "epoch": 2.64, "learning_rate": 1.2465488215488218e-05, "loss": 1.043, "step": 11600 }, { "epoch": 2.65, "learning_rate": 1.2423400673400673e-05, "loss": 1.1023, "step": 11650 }, { "epoch": 2.66, "learning_rate": 1.2381313131313133e-05, "loss": 1.0038, "step": 11700 }, { "epoch": 2.67, "learning_rate": 1.233922558922559e-05, "loss": 1.0056, "step": 11750 }, { "epoch": 2.68, "learning_rate": 1.2297138047138048e-05, "loss": 1.0069, "step": 11800 }, { "epoch": 2.69, "learning_rate": 1.2255050505050505e-05, "loss": 0.8525, "step": 11850 }, { "epoch": 2.7, "learning_rate": 1.2212962962962965e-05, "loss": 1.1088, "step": 11900 }, { "epoch": 2.72, "learning_rate": 1.217087542087542e-05, "loss": 1.0916, "step": 11950 }, { "epoch": 2.73, "learning_rate": 1.212878787878788e-05, "loss": 0.9322, "step": 12000 }, { "epoch": 2.74, "learning_rate": 1.2086700336700337e-05, "loss": 1.0906, "step": 12050 }, { "epoch": 2.75, "learning_rate": 1.2044612794612795e-05, "loss": 0.8504, "step": 12100 }, { "epoch": 2.76, "learning_rate": 1.2002525252525254e-05, "loss": 1.1775, "step": 12150 }, { "epoch": 2.77, "learning_rate": 1.1960437710437712e-05, "loss": 1.1065, "step": 12200 }, { "epoch": 2.78, "learning_rate": 1.191835016835017e-05, "loss": 1.0454, "step": 12250 }, { "epoch": 2.8, "learning_rate": 1.1876262626262627e-05, "loss": 0.8482, "step": 12300 }, { "epoch": 2.81, "learning_rate": 1.1834175084175086e-05, "loss": 1.058, "step": 12350 }, { "epoch": 2.82, "learning_rate": 1.1792087542087542e-05, "loss": 0.871, "step": 12400 }, { "epoch": 2.83, "learning_rate": 1.1750000000000001e-05, "loss": 0.8323, "step": 12450 }, { "epoch": 2.84, "learning_rate": 1.1707912457912459e-05, "loss": 1.0586, "step": 12500 }, { "epoch": 2.84, "eval_loss": 0.8494200110435486, "eval_runtime": 11.4297, "eval_samples_per_second": 124.238, "eval_steps_per_second": 7.787, "eval_wer": 0.5447036398293547, "step": 12500 }, { "epoch": 2.85, "learning_rate": 1.1665824915824918e-05, "loss": 0.8632, "step": 12550 }, { "epoch": 2.86, "learning_rate": 1.1623737373737374e-05, "loss": 0.8505, "step": 12600 }, { "epoch": 2.88, "learning_rate": 1.1581649831649833e-05, "loss": 0.8234, "step": 12650 }, { "epoch": 2.89, "learning_rate": 1.1539562289562289e-05, "loss": 0.9053, "step": 12700 }, { "epoch": 2.9, "learning_rate": 1.1497474747474748e-05, "loss": 0.9149, "step": 12750 }, { "epoch": 2.91, "learning_rate": 1.1455387205387207e-05, "loss": 1.2585, "step": 12800 }, { "epoch": 2.92, "learning_rate": 1.1413299663299665e-05, "loss": 0.8888, "step": 12850 }, { "epoch": 2.93, "learning_rate": 1.1371212121212122e-05, "loss": 1.2228, "step": 12900 }, { "epoch": 2.94, "learning_rate": 1.132912457912458e-05, "loss": 1.0795, "step": 12950 }, { "epoch": 2.95, "learning_rate": 1.1287037037037039e-05, "loss": 0.9032, "step": 13000 }, { "epoch": 2.97, "learning_rate": 1.1244949494949495e-05, "loss": 0.8873, "step": 13050 }, { "epoch": 2.98, "learning_rate": 1.1202861952861954e-05, "loss": 0.865, "step": 13100 }, { "epoch": 2.99, "learning_rate": 1.116077441077441e-05, "loss": 1.0847, "step": 13150 }, { "epoch": 3.0, "learning_rate": 1.111868686868687e-05, "loss": 1.1395, "step": 13200 }, { "epoch": 3.01, "learning_rate": 1.1076599326599327e-05, "loss": 0.9799, "step": 13250 }, { "epoch": 3.02, "learning_rate": 1.1034511784511786e-05, "loss": 1.0804, "step": 13300 }, { "epoch": 3.03, "learning_rate": 1.0992424242424242e-05, "loss": 0.8461, "step": 13350 }, { "epoch": 3.05, "learning_rate": 1.0951178451178453e-05, "loss": 0.9993, "step": 13400 }, { "epoch": 3.06, "learning_rate": 1.0909090909090909e-05, "loss": 0.968, "step": 13450 }, { "epoch": 3.07, "learning_rate": 1.0867003367003368e-05, "loss": 1.0542, "step": 13500 }, { "epoch": 3.08, "learning_rate": 1.0824915824915824e-05, "loss": 0.8753, "step": 13550 }, { "epoch": 3.09, "learning_rate": 1.0782828282828283e-05, "loss": 1.1644, "step": 13600 }, { "epoch": 3.1, "learning_rate": 1.0740740740740742e-05, "loss": 0.8546, "step": 13650 }, { "epoch": 3.11, "learning_rate": 1.06986531986532e-05, "loss": 0.965, "step": 13700 }, { "epoch": 3.12, "learning_rate": 1.0656565656565658e-05, "loss": 0.8564, "step": 13750 }, { "epoch": 3.14, "learning_rate": 1.0614478114478115e-05, "loss": 0.9246, "step": 13800 }, { "epoch": 3.15, "learning_rate": 1.0572390572390574e-05, "loss": 0.9072, "step": 13850 }, { "epoch": 3.16, "learning_rate": 1.053030303030303e-05, "loss": 0.9194, "step": 13900 }, { "epoch": 3.17, "learning_rate": 1.048821548821549e-05, "loss": 0.8967, "step": 13950 }, { "epoch": 3.18, "learning_rate": 1.0446127946127947e-05, "loss": 0.8436, "step": 14000 }, { "epoch": 3.19, "learning_rate": 1.0404040404040405e-05, "loss": 0.8022, "step": 14050 }, { "epoch": 3.2, "learning_rate": 1.0361952861952862e-05, "loss": 1.08, "step": 14100 }, { "epoch": 3.22, "learning_rate": 1.0319865319865321e-05, "loss": 1.3585, "step": 14150 }, { "epoch": 3.23, "learning_rate": 1.0277777777777777e-05, "loss": 1.0101, "step": 14200 }, { "epoch": 3.24, "learning_rate": 1.0235690235690236e-05, "loss": 0.9023, "step": 14250 }, { "epoch": 3.25, "learning_rate": 1.0193602693602696e-05, "loss": 0.8531, "step": 14300 }, { "epoch": 3.26, "learning_rate": 1.0151515151515152e-05, "loss": 0.9215, "step": 14350 }, { "epoch": 3.27, "learning_rate": 1.010942760942761e-05, "loss": 0.8091, "step": 14400 }, { "epoch": 3.28, "learning_rate": 1.0067340067340068e-05, "loss": 0.7762, "step": 14450 }, { "epoch": 3.3, "learning_rate": 1.0025252525252526e-05, "loss": 1.0523, "step": 14500 }, { "epoch": 3.31, "learning_rate": 9.983164983164983e-06, "loss": 0.9758, "step": 14550 }, { "epoch": 3.32, "learning_rate": 9.941077441077443e-06, "loss": 0.9659, "step": 14600 }, { "epoch": 3.33, "learning_rate": 9.8989898989899e-06, "loss": 0.9322, "step": 14650 }, { "epoch": 3.34, "learning_rate": 9.856902356902358e-06, "loss": 1.2675, "step": 14700 }, { "epoch": 3.35, "learning_rate": 9.814814814814815e-06, "loss": 0.8249, "step": 14750 }, { "epoch": 3.36, "learning_rate": 9.772727272727273e-06, "loss": 0.8631, "step": 14800 }, { "epoch": 3.38, "learning_rate": 9.730639730639732e-06, "loss": 0.8615, "step": 14850 }, { "epoch": 3.39, "learning_rate": 9.68855218855219e-06, "loss": 0.7974, "step": 14900 }, { "epoch": 3.4, "learning_rate": 9.646464646464647e-06, "loss": 0.9356, "step": 14950 }, { "epoch": 3.41, "learning_rate": 9.604377104377105e-06, "loss": 0.7728, "step": 15000 }, { "epoch": 3.41, "eval_loss": 0.818789005279541, "eval_runtime": 11.5575, "eval_samples_per_second": 122.864, "eval_steps_per_second": 7.701, "eval_wer": 0.5184714532086775, "step": 15000 }, { "epoch": 3.42, "learning_rate": 9.562289562289562e-06, "loss": 0.8074, "step": 15050 }, { "epoch": 3.43, "learning_rate": 9.52020202020202e-06, "loss": 0.7998, "step": 15100 }, { "epoch": 3.44, "learning_rate": 9.47811447811448e-06, "loss": 1.3138, "step": 15150 }, { "epoch": 3.45, "learning_rate": 9.436026936026937e-06, "loss": 0.788, "step": 15200 }, { "epoch": 3.47, "learning_rate": 9.393939393939396e-06, "loss": 1.1104, "step": 15250 }, { "epoch": 3.48, "learning_rate": 9.351851851851854e-06, "loss": 0.8238, "step": 15300 }, { "epoch": 3.49, "learning_rate": 9.309764309764311e-06, "loss": 0.9569, "step": 15350 }, { "epoch": 3.5, "learning_rate": 9.267676767676769e-06, "loss": 0.9507, "step": 15400 }, { "epoch": 3.51, "learning_rate": 9.225589225589226e-06, "loss": 0.8989, "step": 15450 }, { "epoch": 3.52, "learning_rate": 9.183501683501684e-06, "loss": 0.8032, "step": 15500 }, { "epoch": 3.53, "learning_rate": 9.141414141414143e-06, "loss": 0.7944, "step": 15550 }, { "epoch": 3.55, "learning_rate": 9.0993265993266e-06, "loss": 0.7758, "step": 15600 }, { "epoch": 3.56, "learning_rate": 9.057239057239058e-06, "loss": 0.8787, "step": 15650 }, { "epoch": 3.57, "learning_rate": 9.015151515151516e-06, "loss": 0.8037, "step": 15700 }, { "epoch": 3.58, "learning_rate": 8.973063973063973e-06, "loss": 0.7973, "step": 15750 }, { "epoch": 3.59, "learning_rate": 8.93097643097643e-06, "loss": 0.764, "step": 15800 }, { "epoch": 3.6, "learning_rate": 8.888888888888888e-06, "loss": 1.2857, "step": 15850 }, { "epoch": 3.61, "learning_rate": 8.846801346801348e-06, "loss": 0.8322, "step": 15900 }, { "epoch": 3.62, "learning_rate": 8.804713804713805e-06, "loss": 0.9887, "step": 15950 }, { "epoch": 3.64, "learning_rate": 8.762626262626264e-06, "loss": 0.8534, "step": 16000 }, { "epoch": 3.65, "learning_rate": 8.720538720538722e-06, "loss": 1.2425, "step": 16050 }, { "epoch": 3.66, "learning_rate": 8.67845117845118e-06, "loss": 1.0104, "step": 16100 }, { "epoch": 3.67, "learning_rate": 8.636363636363637e-06, "loss": 0.7788, "step": 16150 }, { "epoch": 3.68, "learning_rate": 8.594276094276095e-06, "loss": 0.7986, "step": 16200 }, { "epoch": 3.69, "learning_rate": 8.552188552188552e-06, "loss": 0.7569, "step": 16250 }, { "epoch": 3.7, "learning_rate": 8.510101010101011e-06, "loss": 0.9866, "step": 16300 }, { "epoch": 3.72, "learning_rate": 8.468013468013469e-06, "loss": 1.02, "step": 16350 }, { "epoch": 3.73, "learning_rate": 8.425925925925926e-06, "loss": 0.7364, "step": 16400 }, { "epoch": 3.74, "learning_rate": 8.383838383838384e-06, "loss": 0.7741, "step": 16450 }, { "epoch": 3.75, "learning_rate": 8.341750841750842e-06, "loss": 0.9328, "step": 16500 }, { "epoch": 3.76, "learning_rate": 8.2996632996633e-06, "loss": 0.7985, "step": 16550 }, { "epoch": 3.77, "learning_rate": 8.257575757575758e-06, "loss": 0.9203, "step": 16600 }, { "epoch": 3.78, "learning_rate": 8.215488215488216e-06, "loss": 0.8495, "step": 16650 }, { "epoch": 3.8, "learning_rate": 8.173400673400675e-06, "loss": 0.9314, "step": 16700 }, { "epoch": 3.81, "learning_rate": 8.131313131313133e-06, "loss": 0.8138, "step": 16750 }, { "epoch": 3.82, "learning_rate": 8.09006734006734e-06, "loss": 1.0835, "step": 16800 }, { "epoch": 3.83, "learning_rate": 8.0479797979798e-06, "loss": 0.775, "step": 16850 }, { "epoch": 3.84, "learning_rate": 8.005892255892257e-06, "loss": 0.8199, "step": 16900 }, { "epoch": 3.85, "learning_rate": 7.963804713804715e-06, "loss": 0.846, "step": 16950 }, { "epoch": 3.86, "learning_rate": 7.921717171717172e-06, "loss": 1.1557, "step": 17000 }, { "epoch": 3.88, "learning_rate": 7.87962962962963e-06, "loss": 0.872, "step": 17050 }, { "epoch": 3.89, "learning_rate": 7.837542087542089e-06, "loss": 0.7823, "step": 17100 }, { "epoch": 3.9, "learning_rate": 7.795454545454547e-06, "loss": 0.8558, "step": 17150 }, { "epoch": 3.91, "learning_rate": 7.753367003367004e-06, "loss": 0.7936, "step": 17200 }, { "epoch": 3.92, "learning_rate": 7.711279461279462e-06, "loss": 0.8159, "step": 17250 }, { "epoch": 3.93, "learning_rate": 7.66919191919192e-06, "loss": 0.7948, "step": 17300 }, { "epoch": 3.94, "learning_rate": 7.627104377104378e-06, "loss": 1.0692, "step": 17350 }, { "epoch": 3.95, "learning_rate": 7.585016835016835e-06, "loss": 0.7547, "step": 17400 }, { "epoch": 3.97, "learning_rate": 7.5429292929292944e-06, "loss": 0.9151, "step": 17450 }, { "epoch": 3.98, "learning_rate": 7.500841750841752e-06, "loss": 1.0428, "step": 17500 }, { "epoch": 3.98, "eval_loss": 0.7845020294189453, "eval_runtime": 11.3056, "eval_samples_per_second": 125.601, "eval_steps_per_second": 7.872, "eval_wer": 0.49614232549695925, "step": 17500 }, { "epoch": 3.99, "learning_rate": 7.4587542087542095e-06, "loss": 0.8456, "step": 17550 }, { "epoch": 4.0, "learning_rate": 7.416666666666668e-06, "loss": 0.916, "step": 17600 }, { "epoch": 4.01, "learning_rate": 7.3745791245791255e-06, "loss": 0.788, "step": 17650 }, { "epoch": 4.02, "learning_rate": 7.332491582491583e-06, "loss": 0.7842, "step": 17700 }, { "epoch": 4.03, "learning_rate": 7.2904040404040414e-06, "loss": 0.8374, "step": 17750 }, { "epoch": 4.05, "learning_rate": 7.248316498316499e-06, "loss": 0.8018, "step": 17800 }, { "epoch": 4.06, "learning_rate": 7.2062289562289565e-06, "loss": 0.8225, "step": 17850 }, { "epoch": 4.07, "learning_rate": 7.164141414141414e-06, "loss": 0.7431, "step": 17900 }, { "epoch": 4.08, "learning_rate": 7.1220538720538725e-06, "loss": 0.7692, "step": 17950 }, { "epoch": 4.09, "learning_rate": 7.07996632996633e-06, "loss": 0.8149, "step": 18000 }, { "epoch": 4.1, "learning_rate": 7.037878787878788e-06, "loss": 0.7791, "step": 18050 }, { "epoch": 4.11, "learning_rate": 6.995791245791247e-06, "loss": 0.9355, "step": 18100 }, { "epoch": 4.12, "learning_rate": 6.953703703703704e-06, "loss": 0.7711, "step": 18150 }, { "epoch": 4.14, "learning_rate": 6.911616161616163e-06, "loss": 0.8754, "step": 18200 }, { "epoch": 4.15, "learning_rate": 6.86952861952862e-06, "loss": 1.2472, "step": 18250 }, { "epoch": 4.16, "learning_rate": 6.827441077441078e-06, "loss": 0.8299, "step": 18300 }, { "epoch": 4.17, "learning_rate": 6.785353535353536e-06, "loss": 0.8129, "step": 18350 }, { "epoch": 4.18, "learning_rate": 6.743265993265994e-06, "loss": 0.9362, "step": 18400 }, { "epoch": 4.19, "learning_rate": 6.701178451178451e-06, "loss": 1.011, "step": 18450 }, { "epoch": 4.2, "learning_rate": 6.65909090909091e-06, "loss": 0.9124, "step": 18500 }, { "epoch": 4.22, "learning_rate": 6.617003367003367e-06, "loss": 0.7725, "step": 18550 }, { "epoch": 4.23, "learning_rate": 6.574915824915825e-06, "loss": 0.9075, "step": 18600 }, { "epoch": 4.24, "learning_rate": 6.532828282828283e-06, "loss": 0.8239, "step": 18650 }, { "epoch": 4.25, "learning_rate": 6.490740740740741e-06, "loss": 0.7452, "step": 18700 }, { "epoch": 4.26, "learning_rate": 6.4486531986532e-06, "loss": 0.913, "step": 18750 }, { "epoch": 4.27, "learning_rate": 6.406565656565658e-06, "loss": 0.9128, "step": 18800 }, { "epoch": 4.28, "learning_rate": 6.364478114478115e-06, "loss": 0.8531, "step": 18850 }, { "epoch": 4.3, "learning_rate": 6.322390572390574e-06, "loss": 0.752, "step": 18900 }, { "epoch": 4.31, "learning_rate": 6.280303030303031e-06, "loss": 0.7641, "step": 18950 }, { "epoch": 4.32, "learning_rate": 6.238215488215489e-06, "loss": 0.8924, "step": 19000 }, { "epoch": 4.33, "learning_rate": 6.196127946127946e-06, "loss": 0.7733, "step": 19050 }, { "epoch": 4.34, "learning_rate": 6.154040404040405e-06, "loss": 0.7289, "step": 19100 }, { "epoch": 4.35, "learning_rate": 6.111952861952862e-06, "loss": 0.9658, "step": 19150 }, { "epoch": 4.36, "learning_rate": 6.06986531986532e-06, "loss": 0.7604, "step": 19200 }, { "epoch": 4.38, "learning_rate": 6.027777777777778e-06, "loss": 0.9021, "step": 19250 }, { "epoch": 4.39, "learning_rate": 5.985690235690236e-06, "loss": 0.8838, "step": 19300 }, { "epoch": 4.4, "learning_rate": 5.943602693602693e-06, "loss": 0.7497, "step": 19350 }, { "epoch": 4.41, "learning_rate": 5.9015151515151525e-06, "loss": 0.7883, "step": 19400 }, { "epoch": 4.42, "learning_rate": 5.85942760942761e-06, "loss": 0.7691, "step": 19450 }, { "epoch": 4.43, "learning_rate": 5.8173400673400684e-06, "loss": 0.7728, "step": 19500 }, { "epoch": 4.44, "learning_rate": 5.775252525252526e-06, "loss": 0.7791, "step": 19550 }, { "epoch": 4.45, "learning_rate": 5.7331649831649835e-06, "loss": 1.0584, "step": 19600 }, { "epoch": 4.47, "learning_rate": 5.691077441077442e-06, "loss": 0.7486, "step": 19650 }, { "epoch": 4.48, "learning_rate": 5.6489898989898995e-06, "loss": 0.7224, "step": 19700 }, { "epoch": 4.49, "learning_rate": 5.606902356902357e-06, "loss": 0.7918, "step": 19750 }, { "epoch": 4.5, "learning_rate": 5.5648148148148154e-06, "loss": 0.7414, "step": 19800 }, { "epoch": 4.51, "learning_rate": 5.522727272727273e-06, "loss": 0.7344, "step": 19850 }, { "epoch": 4.52, "learning_rate": 5.4806397306397305e-06, "loss": 0.8429, "step": 19900 }, { "epoch": 4.53, "learning_rate": 5.438552188552189e-06, "loss": 0.7469, "step": 19950 }, { "epoch": 4.55, "learning_rate": 5.3964646464646465e-06, "loss": 0.8995, "step": 20000 }, { "epoch": 4.55, "eval_loss": 0.7845963835716248, "eval_runtime": 11.6887, "eval_samples_per_second": 121.485, "eval_steps_per_second": 7.614, "eval_wer": 0.48842697649087774, "step": 20000 }, { "epoch": 4.56, "learning_rate": 5.354377104377106e-06, "loss": 0.8411, "step": 20050 }, { "epoch": 4.57, "learning_rate": 5.312289562289563e-06, "loss": 0.8626, "step": 20100 }, { "epoch": 4.58, "learning_rate": 5.270202020202021e-06, "loss": 1.0514, "step": 20150 }, { "epoch": 4.59, "learning_rate": 5.228114478114479e-06, "loss": 0.918, "step": 20200 }, { "epoch": 4.6, "learning_rate": 5.186026936026937e-06, "loss": 0.7164, "step": 20250 }, { "epoch": 4.61, "learning_rate": 5.143939393939394e-06, "loss": 1.0144, "step": 20300 }, { "epoch": 4.62, "learning_rate": 5.101851851851852e-06, "loss": 0.7554, "step": 20350 }, { "epoch": 4.64, "learning_rate": 5.05976430976431e-06, "loss": 0.807, "step": 20400 }, { "epoch": 4.65, "learning_rate": 5.017676767676768e-06, "loss": 0.6884, "step": 20450 }, { "epoch": 4.66, "learning_rate": 4.975589225589225e-06, "loss": 0.8717, "step": 20500 }, { "epoch": 4.67, "learning_rate": 4.933501683501684e-06, "loss": 0.9402, "step": 20550 }, { "epoch": 4.68, "learning_rate": 4.891414141414142e-06, "loss": 0.7554, "step": 20600 }, { "epoch": 4.69, "learning_rate": 4.8493265993266e-06, "loss": 0.7661, "step": 20650 }, { "epoch": 4.7, "learning_rate": 4.807239057239057e-06, "loss": 0.7203, "step": 20700 }, { "epoch": 4.72, "learning_rate": 4.765151515151516e-06, "loss": 0.8046, "step": 20750 }, { "epoch": 4.73, "learning_rate": 4.723063973063973e-06, "loss": 0.7908, "step": 20800 }, { "epoch": 4.74, "learning_rate": 4.680976430976432e-06, "loss": 0.7247, "step": 20850 }, { "epoch": 4.75, "learning_rate": 4.638888888888889e-06, "loss": 0.8461, "step": 20900 }, { "epoch": 4.76, "learning_rate": 4.596801346801348e-06, "loss": 0.898, "step": 20950 }, { "epoch": 4.77, "learning_rate": 4.554713804713805e-06, "loss": 0.8022, "step": 21000 }, { "epoch": 4.78, "learning_rate": 4.512626262626263e-06, "loss": 0.8259, "step": 21050 }, { "epoch": 4.8, "learning_rate": 4.470538720538721e-06, "loss": 0.7305, "step": 21100 }, { "epoch": 4.81, "learning_rate": 4.428451178451179e-06, "loss": 0.7825, "step": 21150 }, { "epoch": 4.82, "learning_rate": 4.386363636363637e-06, "loss": 0.9047, "step": 21200 }, { "epoch": 4.83, "learning_rate": 4.344276094276095e-06, "loss": 0.7717, "step": 21250 }, { "epoch": 4.84, "learning_rate": 4.303030303030303e-06, "loss": 0.7306, "step": 21300 }, { "epoch": 4.85, "learning_rate": 4.2609427609427615e-06, "loss": 0.8822, "step": 21350 }, { "epoch": 4.86, "learning_rate": 4.218855218855219e-06, "loss": 0.7261, "step": 21400 }, { "epoch": 4.88, "learning_rate": 4.1767676767676774e-06, "loss": 0.7726, "step": 21450 }, { "epoch": 4.89, "learning_rate": 4.134680134680135e-06, "loss": 0.9221, "step": 21500 }, { "epoch": 4.9, "learning_rate": 4.092592592592593e-06, "loss": 0.7814, "step": 21550 }, { "epoch": 4.91, "learning_rate": 4.050505050505051e-06, "loss": 1.0285, "step": 21600 }, { "epoch": 4.92, "learning_rate": 4.0084175084175085e-06, "loss": 0.8387, "step": 21650 }, { "epoch": 4.93, "learning_rate": 3.967171717171717e-06, "loss": 1.3018, "step": 21700 }, { "epoch": 4.94, "learning_rate": 3.925084175084175e-06, "loss": 0.7864, "step": 21750 }, { "epoch": 4.95, "learning_rate": 3.882996632996634e-06, "loss": 0.7365, "step": 21800 }, { "epoch": 4.97, "learning_rate": 3.840909090909091e-06, "loss": 1.0941, "step": 21850 }, { "epoch": 4.98, "learning_rate": 3.798821548821549e-06, "loss": 1.0584, "step": 21900 }, { "epoch": 4.99, "learning_rate": 3.756734006734007e-06, "loss": 0.7464, "step": 21950 }, { "epoch": 5.0, "learning_rate": 3.714646464646465e-06, "loss": 0.8112, "step": 22000 }, { "epoch": 5.01, "learning_rate": 3.6725589225589232e-06, "loss": 0.8351, "step": 22050 }, { "epoch": 5.02, "learning_rate": 3.6304713804713808e-06, "loss": 1.0219, "step": 22100 }, { "epoch": 5.03, "learning_rate": 3.5883838383838388e-06, "loss": 0.7192, "step": 22150 }, { "epoch": 5.05, "learning_rate": 3.5462962962962967e-06, "loss": 0.7386, "step": 22200 }, { "epoch": 5.06, "learning_rate": 3.5042087542087543e-06, "loss": 0.7482, "step": 22250 }, { "epoch": 5.07, "learning_rate": 3.4621212121212123e-06, "loss": 0.8197, "step": 22300 }, { "epoch": 5.08, "learning_rate": 3.4200336700336702e-06, "loss": 0.7464, "step": 22350 }, { "epoch": 5.09, "learning_rate": 3.3779461279461286e-06, "loss": 0.8665, "step": 22400 }, { "epoch": 5.1, "learning_rate": 3.335858585858586e-06, "loss": 0.8403, "step": 22450 }, { "epoch": 5.11, "learning_rate": 3.293771043771044e-06, "loss": 0.7284, "step": 22500 }, { "epoch": 5.11, "eval_loss": 0.7877032160758972, "eval_runtime": 13.7114, "eval_samples_per_second": 103.564, "eval_steps_per_second": 6.491, "eval_wer": 0.4866116002541527, "step": 22500 }, { "epoch": 5.12, "learning_rate": 3.2516835016835017e-06, "loss": 0.7678, "step": 22550 }, { "epoch": 5.14, "learning_rate": 3.2095959595959597e-06, "loss": 0.796, "step": 22600 }, { "epoch": 5.15, "learning_rate": 3.1675084175084177e-06, "loss": 0.748, "step": 22650 }, { "epoch": 5.16, "learning_rate": 3.125420875420876e-06, "loss": 0.7264, "step": 22700 }, { "epoch": 5.17, "learning_rate": 3.0833333333333336e-06, "loss": 0.7927, "step": 22750 }, { "epoch": 5.18, "learning_rate": 3.0412457912457916e-06, "loss": 0.7805, "step": 22800 }, { "epoch": 5.19, "learning_rate": 2.9991582491582495e-06, "loss": 0.7128, "step": 22850 }, { "epoch": 5.2, "learning_rate": 2.957070707070707e-06, "loss": 0.7237, "step": 22900 }, { "epoch": 5.22, "learning_rate": 2.914983164983165e-06, "loss": 0.6766, "step": 22950 }, { "epoch": 5.23, "learning_rate": 2.8728956228956235e-06, "loss": 0.7575, "step": 23000 }, { "epoch": 5.24, "learning_rate": 2.830808080808081e-06, "loss": 0.7613, "step": 23050 }, { "epoch": 5.25, "learning_rate": 2.788720538720539e-06, "loss": 0.8067, "step": 23100 }, { "epoch": 5.26, "learning_rate": 2.746632996632997e-06, "loss": 0.7005, "step": 23150 }, { "epoch": 5.27, "learning_rate": 2.7045454545454545e-06, "loss": 0.8384, "step": 23200 }, { "epoch": 5.28, "learning_rate": 2.6624579124579125e-06, "loss": 0.8518, "step": 23250 }, { "epoch": 5.3, "learning_rate": 2.6203703703703705e-06, "loss": 0.8564, "step": 23300 }, { "epoch": 5.31, "learning_rate": 2.578282828282829e-06, "loss": 0.7896, "step": 23350 }, { "epoch": 5.32, "learning_rate": 2.5361952861952864e-06, "loss": 0.856, "step": 23400 }, { "epoch": 5.33, "learning_rate": 2.4941077441077444e-06, "loss": 0.7428, "step": 23450 }, { "epoch": 5.34, "learning_rate": 2.4520202020202024e-06, "loss": 0.8151, "step": 23500 }, { "epoch": 5.35, "learning_rate": 2.40993265993266e-06, "loss": 0.7196, "step": 23550 }, { "epoch": 5.36, "learning_rate": 2.3678451178451183e-06, "loss": 0.9005, "step": 23600 }, { "epoch": 5.38, "learning_rate": 2.325757575757576e-06, "loss": 0.7523, "step": 23650 }, { "epoch": 5.39, "learning_rate": 2.283670033670034e-06, "loss": 0.7352, "step": 23700 }, { "epoch": 5.4, "learning_rate": 2.241582491582492e-06, "loss": 0.8185, "step": 23750 }, { "epoch": 5.41, "learning_rate": 2.19949494949495e-06, "loss": 0.8663, "step": 23800 }, { "epoch": 5.42, "learning_rate": 2.1574074074074073e-06, "loss": 0.7751, "step": 23850 }, { "epoch": 5.43, "learning_rate": 2.1161616161616163e-06, "loss": 0.8707, "step": 23900 }, { "epoch": 5.44, "learning_rate": 2.0740740740740742e-06, "loss": 0.9344, "step": 23950 }, { "epoch": 5.45, "learning_rate": 2.0319865319865322e-06, "loss": 0.8397, "step": 24000 }, { "epoch": 5.47, "learning_rate": 1.98989898989899e-06, "loss": 0.7523, "step": 24050 }, { "epoch": 5.48, "learning_rate": 1.9478114478114477e-06, "loss": 0.834, "step": 24100 }, { "epoch": 5.49, "learning_rate": 1.9057239057239057e-06, "loss": 0.6954, "step": 24150 }, { "epoch": 5.5, "learning_rate": 1.863636363636364e-06, "loss": 0.8846, "step": 24200 }, { "epoch": 5.51, "learning_rate": 1.8215488215488217e-06, "loss": 0.8028, "step": 24250 }, { "epoch": 5.52, "learning_rate": 1.7794612794612794e-06, "loss": 0.7823, "step": 24300 }, { "epoch": 5.53, "learning_rate": 1.7373737373737376e-06, "loss": 1.1963, "step": 24350 }, { "epoch": 5.55, "learning_rate": 1.6952861952861954e-06, "loss": 0.7852, "step": 24400 }, { "epoch": 5.56, "learning_rate": 1.6531986531986531e-06, "loss": 0.7807, "step": 24450 }, { "epoch": 5.57, "learning_rate": 1.6111111111111113e-06, "loss": 0.736, "step": 24500 }, { "epoch": 5.58, "learning_rate": 1.569023569023569e-06, "loss": 0.7848, "step": 24550 }, { "epoch": 5.59, "learning_rate": 1.526936026936027e-06, "loss": 1.0245, "step": 24600 }, { "epoch": 5.6, "learning_rate": 1.484848484848485e-06, "loss": 0.7971, "step": 24650 }, { "epoch": 5.61, "learning_rate": 1.442760942760943e-06, "loss": 0.7535, "step": 24700 }, { "epoch": 5.62, "learning_rate": 1.4006734006734008e-06, "loss": 0.7523, "step": 24750 }, { "epoch": 5.64, "learning_rate": 1.3585858585858585e-06, "loss": 0.9734, "step": 24800 }, { "epoch": 5.65, "learning_rate": 1.3164983164983167e-06, "loss": 0.8716, "step": 24850 }, { "epoch": 5.66, "learning_rate": 1.2744107744107745e-06, "loss": 0.7526, "step": 24900 }, { "epoch": 5.67, "learning_rate": 1.2323232323232325e-06, "loss": 0.7232, "step": 24950 }, { "epoch": 5.68, "learning_rate": 1.1902356902356902e-06, "loss": 0.7733, "step": 25000 }, { "epoch": 5.68, "eval_loss": 0.7833463549613953, "eval_runtime": 29.2924, "eval_samples_per_second": 48.477, "eval_steps_per_second": 3.038, "eval_wer": 0.4842516111464101, "step": 25000 }, { "epoch": 5.69, "learning_rate": 1.1481481481481482e-06, "loss": 0.79, "step": 25050 }, { "epoch": 5.7, "learning_rate": 1.1060606060606062e-06, "loss": 0.7398, "step": 25100 }, { "epoch": 5.72, "learning_rate": 1.063973063973064e-06, "loss": 0.7473, "step": 25150 }, { "epoch": 5.73, "learning_rate": 1.021885521885522e-06, "loss": 0.7433, "step": 25200 }, { "epoch": 5.74, "learning_rate": 9.797979797979799e-07, "loss": 1.0846, "step": 25250 }, { "epoch": 5.75, "learning_rate": 9.377104377104378e-07, "loss": 0.6879, "step": 25300 }, { "epoch": 5.76, "learning_rate": 8.956228956228957e-07, "loss": 0.9003, "step": 25350 }, { "epoch": 5.77, "learning_rate": 8.535353535353535e-07, "loss": 0.8282, "step": 25400 }, { "epoch": 5.78, "learning_rate": 8.114478114478115e-07, "loss": 0.7333, "step": 25450 }, { "epoch": 5.8, "learning_rate": 7.693602693602694e-07, "loss": 0.7157, "step": 25500 }, { "epoch": 5.81, "learning_rate": 7.272727272727273e-07, "loss": 0.7324, "step": 25550 }, { "epoch": 5.82, "learning_rate": 6.851851851851853e-07, "loss": 0.8716, "step": 25600 }, { "epoch": 5.83, "learning_rate": 6.430976430976431e-07, "loss": 0.7058, "step": 25650 }, { "epoch": 5.84, "learning_rate": 6.01010101010101e-07, "loss": 0.7181, "step": 25700 }, { "epoch": 5.85, "learning_rate": 5.589225589225589e-07, "loss": 0.7017, "step": 25750 }, { "epoch": 5.86, "learning_rate": 5.168350168350169e-07, "loss": 0.7228, "step": 25800 }, { "epoch": 5.88, "learning_rate": 4.747474747474748e-07, "loss": 0.7029, "step": 25850 }, { "epoch": 5.89, "learning_rate": 4.3265993265993266e-07, "loss": 0.7339, "step": 25900 }, { "epoch": 5.9, "learning_rate": 3.905723905723906e-07, "loss": 0.7346, "step": 25950 }, { "epoch": 5.91, "learning_rate": 3.4848484848484856e-07, "loss": 0.7139, "step": 26000 }, { "epoch": 5.92, "learning_rate": 3.0639730639730643e-07, "loss": 0.9626, "step": 26050 }, { "epoch": 5.93, "learning_rate": 2.643097643097643e-07, "loss": 0.7418, "step": 26100 }, { "epoch": 5.94, "learning_rate": 2.2222222222222224e-07, "loss": 0.8973, "step": 26150 }, { "epoch": 5.95, "learning_rate": 1.8013468013468014e-07, "loss": 0.8181, "step": 26200 }, { "epoch": 5.97, "learning_rate": 1.3804713804713806e-07, "loss": 0.6879, "step": 26250 }, { "epoch": 5.98, "learning_rate": 9.595959595959596e-08, "loss": 0.9437, "step": 26300 }, { "epoch": 5.99, "learning_rate": 5.387205387205388e-08, "loss": 0.7509, "step": 26350 }, { "epoch": 6.0, "learning_rate": 1.1784511784511785e-08, "loss": 0.7601, "step": 26400 }, { "epoch": 6.0, "step": 26400, "total_flos": 3.490209998680705e+20, "train_loss": 1.7717995837240508, "train_runtime": 16288.9371, "train_samples_per_second": 64.824, "train_steps_per_second": 1.621 } ], "logging_steps": 50, "max_steps": 26400, "num_train_epochs": 6, "save_steps": 500, "total_flos": 3.490209998680705e+20, "trial_name": null, "trial_params": null }