{ "best_metric": 18.128184499865917, "best_model_checkpoint": "./logs/whisper-base-cantonese/checkpoint-7200", "epoch": 10.02875, "eval_steps": 400, "global_step": 7200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.3e-06, "loss": 2.0581, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.800000000000001e-06, "loss": 0.8569, "step": 50 }, { "epoch": 0.0, "learning_rate": 7.2999999999999996e-06, "loss": 0.4243, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.800000000000001e-06, "loss": 0.3528, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.23e-05, "loss": 0.3243, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.48e-05, "loss": 0.3011, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.73e-05, "loss": 0.2793, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.9800000000000004e-05, "loss": 0.2669, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.23e-05, "loss": 0.2676, "step": 225 }, { "epoch": 0.01, "learning_rate": 2.48e-05, "loss": 0.252, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.7300000000000003e-05, "loss": 0.2484, "step": 275 }, { "epoch": 0.01, "learning_rate": 2.98e-05, "loss": 0.2327, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.2300000000000006e-05, "loss": 0.235, "step": 325 }, { "epoch": 0.02, "learning_rate": 3.48e-05, "loss": 0.2229, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.73e-05, "loss": 0.2272, "step": 375 }, { "epoch": 0.02, "learning_rate": 3.9800000000000005e-05, "loss": 0.2162, "step": 400 }, { "epoch": 0.02, "eval_cer": 36.56029319746134, "eval_loss": 1.0470175743103027, "eval_runtime": 127.7891, "eval_samples_per_second": 7.825, "eval_steps_per_second": 0.493, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.23e-05, "loss": 0.2131, "step": 425 }, { "epoch": 0.02, "learning_rate": 4.4800000000000005e-05, "loss": 0.2135, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.73e-05, "loss": 0.211, "step": 475 }, { "epoch": 0.03, "learning_rate": 4.9800000000000004e-05, "loss": 0.2157, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.994102564102565e-05, "loss": 0.2063, "step": 525 }, { "epoch": 0.03, "learning_rate": 4.987692307692308e-05, "loss": 0.2048, "step": 550 }, { "epoch": 0.03, "learning_rate": 4.981282051282051e-05, "loss": 0.6392, "step": 575 }, { "epoch": 0.03, "learning_rate": 4.974871794871795e-05, "loss": 0.5431, "step": 600 }, { "epoch": 0.03, "learning_rate": 4.9684615384615384e-05, "loss": 0.515, "step": 625 }, { "epoch": 0.03, "learning_rate": 4.962051282051282e-05, "loss": 0.4156, "step": 650 }, { "epoch": 1.0, "learning_rate": 4.9556410256410256e-05, "loss": 0.3158, "step": 675 }, { "epoch": 1.0, "learning_rate": 4.9492307692307695e-05, "loss": 0.2046, "step": 700 }, { "epoch": 1.0, "learning_rate": 4.942820512820513e-05, "loss": 0.1938, "step": 725 }, { "epoch": 1.0, "learning_rate": 4.9364102564102567e-05, "loss": 0.1776, "step": 750 }, { "epoch": 1.01, "learning_rate": 4.93e-05, "loss": 0.1726, "step": 775 }, { "epoch": 1.01, "learning_rate": 4.923589743589744e-05, "loss": 0.1652, "step": 800 }, { "epoch": 1.01, "eval_cer": 26.888352552069367, "eval_loss": 0.5993894338607788, "eval_runtime": 126.7133, "eval_samples_per_second": 7.892, "eval_steps_per_second": 0.497, "step": 800 }, { "epoch": 1.01, "learning_rate": 4.917179487179488e-05, "loss": 0.162, "step": 825 }, { "epoch": 1.01, "learning_rate": 4.910769230769231e-05, "loss": 0.1467, "step": 850 }, { "epoch": 1.01, "learning_rate": 4.904358974358975e-05, "loss": 0.1524, "step": 875 }, { "epoch": 1.01, "learning_rate": 4.897948717948718e-05, "loss": 0.147, "step": 900 }, { "epoch": 1.01, "learning_rate": 4.891538461538462e-05, "loss": 0.1398, "step": 925 }, { "epoch": 1.01, "learning_rate": 4.885128205128205e-05, "loss": 0.1347, "step": 950 }, { "epoch": 1.02, "learning_rate": 4.878717948717949e-05, "loss": 0.1314, "step": 975 }, { "epoch": 1.02, "learning_rate": 4.8723076923076925e-05, "loss": 0.1215, "step": 1000 }, { "epoch": 1.02, "learning_rate": 4.8658974358974364e-05, "loss": 0.1219, "step": 1025 }, { "epoch": 1.02, "learning_rate": 4.8594871794871796e-05, "loss": 0.1199, "step": 1050 }, { "epoch": 1.02, "learning_rate": 4.8530769230769236e-05, "loss": 0.1133, "step": 1075 }, { "epoch": 1.02, "learning_rate": 4.8466666666666675e-05, "loss": 0.1101, "step": 1100 }, { "epoch": 1.02, "learning_rate": 4.840256410256411e-05, "loss": 0.1082, "step": 1125 }, { "epoch": 1.02, "learning_rate": 4.833846153846154e-05, "loss": 0.1124, "step": 1150 }, { "epoch": 1.03, "learning_rate": 4.827435897435897e-05, "loss": 0.1073, "step": 1175 }, { "epoch": 1.03, "learning_rate": 4.821025641025641e-05, "loss": 0.0978, "step": 1200 }, { "epoch": 1.03, "eval_cer": 31.79583445070171, "eval_loss": 0.7157341241836548, "eval_runtime": 131.399, "eval_samples_per_second": 7.61, "eval_steps_per_second": 0.479, "step": 1200 }, { "epoch": 1.03, "learning_rate": 4.8146153846153844e-05, "loss": 0.2522, "step": 1225 }, { "epoch": 1.03, "learning_rate": 4.808205128205128e-05, "loss": 0.3732, "step": 1250 }, { "epoch": 1.03, "learning_rate": 4.8017948717948715e-05, "loss": 0.3506, "step": 1275 }, { "epoch": 1.03, "learning_rate": 4.7953846153846154e-05, "loss": 0.2876, "step": 1300 }, { "epoch": 1.03, "learning_rate": 4.7889743589743594e-05, "loss": 0.2475, "step": 1325 }, { "epoch": 2.0, "learning_rate": 4.7825641025641026e-05, "loss": 0.1111, "step": 1350 }, { "epoch": 2.0, "learning_rate": 4.7761538461538465e-05, "loss": 0.1053, "step": 1375 }, { "epoch": 2.0, "learning_rate": 4.76974358974359e-05, "loss": 0.0915, "step": 1400 }, { "epoch": 2.0, "learning_rate": 4.763333333333334e-05, "loss": 0.0941, "step": 1425 }, { "epoch": 2.01, "learning_rate": 4.756923076923077e-05, "loss": 0.0863, "step": 1450 }, { "epoch": 2.01, "learning_rate": 4.750512820512821e-05, "loss": 0.0854, "step": 1475 }, { "epoch": 2.01, "learning_rate": 4.744102564102564e-05, "loss": 0.0806, "step": 1500 }, { "epoch": 2.01, "learning_rate": 4.737692307692308e-05, "loss": 0.0788, "step": 1525 }, { "epoch": 2.01, "learning_rate": 4.731282051282051e-05, "loss": 0.0784, "step": 1550 }, { "epoch": 2.01, "learning_rate": 4.724871794871795e-05, "loss": 0.0811, "step": 1575 }, { "epoch": 2.01, "learning_rate": 4.718461538461539e-05, "loss": 0.0769, "step": 1600 }, { "epoch": 2.01, "eval_cer": 25.914007329936535, "eval_loss": 0.6011224389076233, "eval_runtime": 131.3826, "eval_samples_per_second": 7.611, "eval_steps_per_second": 0.48, "step": 1600 }, { "epoch": 2.02, "learning_rate": 4.7120512820512823e-05, "loss": 0.069, "step": 1625 }, { "epoch": 2.02, "learning_rate": 4.705641025641026e-05, "loss": 0.0694, "step": 1650 }, { "epoch": 2.02, "learning_rate": 4.6992307692307695e-05, "loss": 0.0641, "step": 1675 }, { "epoch": 2.02, "learning_rate": 4.6928205128205134e-05, "loss": 0.0704, "step": 1700 }, { "epoch": 2.02, "learning_rate": 4.686410256410257e-05, "loss": 0.0638, "step": 1725 }, { "epoch": 2.02, "learning_rate": 4.6800000000000006e-05, "loss": 0.0612, "step": 1750 }, { "epoch": 2.02, "learning_rate": 4.673589743589744e-05, "loss": 0.062, "step": 1775 }, { "epoch": 2.02, "learning_rate": 4.667179487179487e-05, "loss": 0.0643, "step": 1800 }, { "epoch": 2.02, "learning_rate": 4.660769230769231e-05, "loss": 0.0608, "step": 1825 }, { "epoch": 2.03, "learning_rate": 4.654358974358974e-05, "loss": 0.0601, "step": 1850 }, { "epoch": 2.03, "learning_rate": 4.647948717948718e-05, "loss": 0.06, "step": 1875 }, { "epoch": 2.03, "learning_rate": 4.6415384615384614e-05, "loss": 0.2819, "step": 1900 }, { "epoch": 2.03, "learning_rate": 4.635128205128205e-05, "loss": 0.2377, "step": 1925 }, { "epoch": 2.03, "learning_rate": 4.6287179487179486e-05, "loss": 0.2326, "step": 1950 }, { "epoch": 2.03, "learning_rate": 4.6223076923076925e-05, "loss": 0.1679, "step": 1975 }, { "epoch": 3.0, "learning_rate": 4.615897435897436e-05, "loss": 0.1142, "step": 2000 }, { "epoch": 3.0, "eval_cer": 20.872441226423526, "eval_loss": 0.478270024061203, "eval_runtime": 125.9954, "eval_samples_per_second": 7.937, "eval_steps_per_second": 0.5, "step": 2000 }, { "epoch": 3.0, "learning_rate": 4.6094871794871797e-05, "loss": 0.0557, "step": 2025 }, { "epoch": 3.0, "learning_rate": 4.6030769230769236e-05, "loss": 0.0551, "step": 2050 }, { "epoch": 3.0, "learning_rate": 4.596666666666667e-05, "loss": 0.0511, "step": 2075 }, { "epoch": 3.01, "learning_rate": 4.590256410256411e-05, "loss": 0.052, "step": 2100 }, { "epoch": 3.01, "learning_rate": 4.583846153846154e-05, "loss": 0.0462, "step": 2125 }, { "epoch": 3.01, "learning_rate": 4.577435897435898e-05, "loss": 0.0468, "step": 2150 }, { "epoch": 3.01, "learning_rate": 4.5712820512820514e-05, "loss": 0.0405, "step": 2175 }, { "epoch": 3.01, "learning_rate": 4.5648717948717954e-05, "loss": 0.0449, "step": 2200 }, { "epoch": 3.01, "learning_rate": 4.5584615384615386e-05, "loss": 0.0437, "step": 2225 }, { "epoch": 3.01, "learning_rate": 4.5520512820512825e-05, "loss": 0.0456, "step": 2250 }, { "epoch": 3.01, "learning_rate": 4.545641025641026e-05, "loss": 0.0416, "step": 2275 }, { "epoch": 3.02, "learning_rate": 4.53923076923077e-05, "loss": 0.0411, "step": 2300 }, { "epoch": 3.02, "learning_rate": 4.532820512820513e-05, "loss": 0.0363, "step": 2325 }, { "epoch": 3.02, "learning_rate": 4.526410256410257e-05, "loss": 0.0378, "step": 2350 }, { "epoch": 3.02, "learning_rate": 4.52e-05, "loss": 0.038, "step": 2375 }, { "epoch": 3.02, "learning_rate": 4.513589743589744e-05, "loss": 0.037, "step": 2400 }, { "epoch": 3.02, "eval_cer": 23.688209528917493, "eval_loss": 0.5917167067527771, "eval_runtime": 129.3069, "eval_samples_per_second": 7.734, "eval_steps_per_second": 0.487, "step": 2400 }, { "epoch": 3.02, "learning_rate": 4.507179487179487e-05, "loss": 0.035, "step": 2425 }, { "epoch": 3.02, "learning_rate": 4.500769230769231e-05, "loss": 0.0372, "step": 2450 }, { "epoch": 3.02, "learning_rate": 4.494358974358975e-05, "loss": 0.04, "step": 2475 }, { "epoch": 3.03, "learning_rate": 4.4879487179487183e-05, "loss": 0.0348, "step": 2500 }, { "epoch": 3.03, "learning_rate": 4.481538461538462e-05, "loss": 0.0328, "step": 2525 }, { "epoch": 3.03, "learning_rate": 4.475128205128205e-05, "loss": 0.1175, "step": 2550 }, { "epoch": 3.03, "learning_rate": 4.468717948717949e-05, "loss": 0.1797, "step": 2575 }, { "epoch": 3.03, "learning_rate": 4.462307692307692e-05, "loss": 0.1705, "step": 2600 }, { "epoch": 3.03, "learning_rate": 4.455897435897436e-05, "loss": 0.1245, "step": 2625 }, { "epoch": 3.03, "learning_rate": 4.44948717948718e-05, "loss": 0.1061, "step": 2650 }, { "epoch": 4.0, "learning_rate": 4.443076923076923e-05, "loss": 0.0318, "step": 2675 }, { "epoch": 4.0, "learning_rate": 4.436666666666667e-05, "loss": 0.0337, "step": 2700 }, { "epoch": 4.0, "learning_rate": 4.43025641025641e-05, "loss": 0.029, "step": 2725 }, { "epoch": 4.0, "learning_rate": 4.423846153846154e-05, "loss": 0.0289, "step": 2750 }, { "epoch": 4.01, "learning_rate": 4.4174358974358974e-05, "loss": 0.0297, "step": 2775 }, { "epoch": 4.01, "learning_rate": 4.411025641025641e-05, "loss": 0.027, "step": 2800 }, { "epoch": 4.01, "eval_cer": 21.936175918476803, "eval_loss": 0.5530263185501099, "eval_runtime": 132.1424, "eval_samples_per_second": 7.568, "eval_steps_per_second": 0.477, "step": 2800 }, { "epoch": 4.01, "learning_rate": 4.4046153846153846e-05, "loss": 0.0256, "step": 2825 }, { "epoch": 4.01, "learning_rate": 4.3982051282051285e-05, "loss": 0.0253, "step": 2850 }, { "epoch": 4.01, "learning_rate": 4.391794871794872e-05, "loss": 0.0247, "step": 2875 }, { "epoch": 4.01, "learning_rate": 4.3853846153846156e-05, "loss": 0.0244, "step": 2900 }, { "epoch": 4.01, "learning_rate": 4.3789743589743596e-05, "loss": 0.0253, "step": 2925 }, { "epoch": 4.01, "learning_rate": 4.372564102564103e-05, "loss": 0.0238, "step": 2950 }, { "epoch": 4.02, "learning_rate": 4.366153846153847e-05, "loss": 0.0237, "step": 2975 }, { "epoch": 4.02, "learning_rate": 4.35974358974359e-05, "loss": 0.0209, "step": 3000 }, { "epoch": 4.02, "learning_rate": 4.353333333333334e-05, "loss": 0.0236, "step": 3025 }, { "epoch": 4.02, "learning_rate": 4.346923076923077e-05, "loss": 0.0217, "step": 3050 }, { "epoch": 4.02, "learning_rate": 4.340512820512821e-05, "loss": 0.0208, "step": 3075 }, { "epoch": 4.02, "learning_rate": 4.334102564102564e-05, "loss": 0.02, "step": 3100 }, { "epoch": 4.02, "learning_rate": 4.327692307692308e-05, "loss": 0.0222, "step": 3125 }, { "epoch": 4.03, "learning_rate": 4.3212820512820515e-05, "loss": 0.0204, "step": 3150 }, { "epoch": 4.03, "learning_rate": 4.314871794871795e-05, "loss": 0.0203, "step": 3175 }, { "epoch": 4.03, "learning_rate": 4.3084615384615386e-05, "loss": 0.0199, "step": 3200 }, { "epoch": 4.03, "eval_cer": 29.847144006436043, "eval_loss": 0.6281722187995911, "eval_runtime": 134.3999, "eval_samples_per_second": 7.44, "eval_steps_per_second": 0.469, "step": 3200 }, { "epoch": 4.03, "learning_rate": 4.302051282051282e-05, "loss": 0.1392, "step": 3225 }, { "epoch": 4.03, "learning_rate": 4.295641025641026e-05, "loss": 0.1092, "step": 3250 }, { "epoch": 4.03, "learning_rate": 4.289230769230769e-05, "loss": 0.1003, "step": 3275 }, { "epoch": 4.03, "learning_rate": 4.282820512820513e-05, "loss": 0.0684, "step": 3300 }, { "epoch": 5.0, "learning_rate": 4.276410256410256e-05, "loss": 0.0438, "step": 3325 }, { "epoch": 5.0, "learning_rate": 4.27e-05, "loss": 0.0163, "step": 3350 }, { "epoch": 5.0, "learning_rate": 4.263589743589744e-05, "loss": 0.0179, "step": 3375 }, { "epoch": 5.0, "learning_rate": 4.257179487179487e-05, "loss": 0.0178, "step": 3400 }, { "epoch": 5.01, "learning_rate": 4.250769230769231e-05, "loss": 0.0166, "step": 3425 }, { "epoch": 5.01, "learning_rate": 4.2443589743589744e-05, "loss": 0.0152, "step": 3450 }, { "epoch": 5.01, "learning_rate": 4.2379487179487184e-05, "loss": 0.0161, "step": 3475 }, { "epoch": 5.01, "learning_rate": 4.2315384615384616e-05, "loss": 0.0144, "step": 3500 }, { "epoch": 5.01, "learning_rate": 4.2251282051282055e-05, "loss": 0.0179, "step": 3525 }, { "epoch": 5.01, "learning_rate": 4.218717948717949e-05, "loss": 0.0154, "step": 3550 }, { "epoch": 5.01, "learning_rate": 4.212307692307693e-05, "loss": 0.0146, "step": 3575 }, { "epoch": 5.01, "learning_rate": 4.205897435897436e-05, "loss": 0.0147, "step": 3600 }, { "epoch": 5.01, "eval_cer": 21.88254223652454, "eval_loss": 0.5774866938591003, "eval_runtime": 130.6083, "eval_samples_per_second": 7.656, "eval_steps_per_second": 0.482, "step": 3600 }, { "epoch": 5.02, "learning_rate": 4.19948717948718e-05, "loss": 0.0155, "step": 3625 }, { "epoch": 5.02, "learning_rate": 4.193076923076924e-05, "loss": 0.0142, "step": 3650 }, { "epoch": 5.02, "learning_rate": 4.186666666666667e-05, "loss": 0.014, "step": 3675 }, { "epoch": 5.02, "learning_rate": 4.180256410256411e-05, "loss": 0.0122, "step": 3700 }, { "epoch": 5.02, "learning_rate": 4.173846153846154e-05, "loss": 0.0119, "step": 3725 }, { "epoch": 5.02, "learning_rate": 4.167435897435898e-05, "loss": 0.0117, "step": 3750 }, { "epoch": 5.02, "learning_rate": 4.161025641025641e-05, "loss": 0.0126, "step": 3775 }, { "epoch": 5.02, "learning_rate": 4.1546153846153846e-05, "loss": 0.0141, "step": 3800 }, { "epoch": 5.03, "learning_rate": 4.1482051282051285e-05, "loss": 0.012, "step": 3825 }, { "epoch": 5.03, "learning_rate": 4.141794871794872e-05, "loss": 0.0111, "step": 3850 }, { "epoch": 5.03, "learning_rate": 4.135384615384616e-05, "loss": 0.06, "step": 3875 }, { "epoch": 5.03, "learning_rate": 4.128974358974359e-05, "loss": 0.0784, "step": 3900 }, { "epoch": 5.03, "learning_rate": 4.122564102564103e-05, "loss": 0.0748, "step": 3925 }, { "epoch": 5.03, "learning_rate": 4.116153846153846e-05, "loss": 0.0475, "step": 3950 }, { "epoch": 5.03, "learning_rate": 4.10974358974359e-05, "loss": 0.0427, "step": 3975 }, { "epoch": 6.0, "learning_rate": 4.103333333333333e-05, "loss": 0.0119, "step": 4000 }, { "epoch": 6.0, "eval_cer": 20.461249664789488, "eval_loss": 0.49904364347457886, "eval_runtime": 131.4191, "eval_samples_per_second": 7.609, "eval_steps_per_second": 0.479, "step": 4000 }, { "epoch": 6.0, "learning_rate": 4.096923076923077e-05, "loss": 0.0115, "step": 4025 }, { "epoch": 6.0, "learning_rate": 4.0905128205128204e-05, "loss": 0.0107, "step": 4050 }, { "epoch": 6.0, "learning_rate": 4.084102564102564e-05, "loss": 0.0108, "step": 4075 }, { "epoch": 6.01, "learning_rate": 4.077692307692308e-05, "loss": 0.0107, "step": 4100 }, { "epoch": 6.01, "learning_rate": 4.0712820512820515e-05, "loss": 0.0091, "step": 4125 }, { "epoch": 6.01, "learning_rate": 4.0648717948717954e-05, "loss": 0.0114, "step": 4150 }, { "epoch": 6.01, "learning_rate": 4.0584615384615386e-05, "loss": 0.0102, "step": 4175 }, { "epoch": 6.01, "learning_rate": 4.0520512820512826e-05, "loss": 0.0119, "step": 4200 }, { "epoch": 6.01, "learning_rate": 4.045897435897436e-05, "loss": 0.0109, "step": 4225 }, { "epoch": 6.01, "learning_rate": 4.03948717948718e-05, "loss": 0.011, "step": 4250 }, { "epoch": 6.01, "learning_rate": 4.033076923076923e-05, "loss": 0.0098, "step": 4275 }, { "epoch": 6.02, "learning_rate": 4.026666666666667e-05, "loss": 0.0114, "step": 4300 }, { "epoch": 6.02, "learning_rate": 4.0202564102564104e-05, "loss": 0.0097, "step": 4325 }, { "epoch": 6.02, "learning_rate": 4.0138461538461544e-05, "loss": 0.0098, "step": 4350 }, { "epoch": 6.02, "learning_rate": 4.0074358974358976e-05, "loss": 0.01, "step": 4375 }, { "epoch": 6.02, "learning_rate": 4.0010256410256415e-05, "loss": 0.0088, "step": 4400 }, { "epoch": 6.02, "eval_cer": 22.088138017341556, "eval_loss": 0.5657151341438293, "eval_runtime": 129.1878, "eval_samples_per_second": 7.741, "eval_steps_per_second": 0.488, "step": 4400 }, { "epoch": 6.02, "learning_rate": 3.994615384615385e-05, "loss": 0.0089, "step": 4425 }, { "epoch": 6.02, "learning_rate": 3.988205128205129e-05, "loss": 0.01, "step": 4450 }, { "epoch": 6.03, "learning_rate": 3.981794871794872e-05, "loss": 0.0089, "step": 4475 }, { "epoch": 6.03, "learning_rate": 3.975384615384616e-05, "loss": 0.0099, "step": 4500 }, { "epoch": 6.03, "learning_rate": 3.96897435897436e-05, "loss": 0.0079, "step": 4525 }, { "epoch": 6.03, "learning_rate": 3.962564102564102e-05, "loss": 0.0596, "step": 4550 }, { "epoch": 6.03, "learning_rate": 3.956153846153846e-05, "loss": 0.0518, "step": 4575 }, { "epoch": 6.03, "learning_rate": 3.9497435897435895e-05, "loss": 0.0536, "step": 4600 }, { "epoch": 6.03, "learning_rate": 3.9433333333333334e-05, "loss": 0.0309, "step": 4625 }, { "epoch": 7.0, "learning_rate": 3.9369230769230767e-05, "loss": 0.0239, "step": 4650 }, { "epoch": 7.0, "learning_rate": 3.9305128205128206e-05, "loss": 0.0096, "step": 4675 }, { "epoch": 7.0, "learning_rate": 3.9241025641025645e-05, "loss": 0.0085, "step": 4700 }, { "epoch": 7.0, "learning_rate": 3.917692307692308e-05, "loss": 0.0075, "step": 4725 }, { "epoch": 7.01, "learning_rate": 3.9112820512820517e-05, "loss": 0.0083, "step": 4750 }, { "epoch": 7.01, "learning_rate": 3.904871794871795e-05, "loss": 0.0084, "step": 4775 }, { "epoch": 7.01, "learning_rate": 3.898461538461539e-05, "loss": 0.0081, "step": 4800 }, { "epoch": 7.01, "eval_cer": 20.6042728166622, "eval_loss": 0.5471253395080566, "eval_runtime": 129.9689, "eval_samples_per_second": 7.694, "eval_steps_per_second": 0.485, "step": 4800 }, { "epoch": 7.01, "learning_rate": 3.892051282051282e-05, "loss": 0.007, "step": 4825 }, { "epoch": 7.01, "learning_rate": 3.885641025641026e-05, "loss": 0.0099, "step": 4850 }, { "epoch": 7.01, "learning_rate": 3.879230769230769e-05, "loss": 0.0084, "step": 4875 }, { "epoch": 7.01, "learning_rate": 3.872820512820513e-05, "loss": 0.0087, "step": 4900 }, { "epoch": 7.01, "learning_rate": 3.8664102564102564e-05, "loss": 0.0072, "step": 4925 }, { "epoch": 7.02, "learning_rate": 3.86e-05, "loss": 0.008, "step": 4950 }, { "epoch": 7.02, "learning_rate": 3.853589743589744e-05, "loss": 0.0074, "step": 4975 }, { "epoch": 7.02, "learning_rate": 3.8471794871794875e-05, "loss": 0.0084, "step": 5000 }, { "epoch": 7.02, "learning_rate": 3.8407692307692314e-05, "loss": 0.009, "step": 5025 }, { "epoch": 7.02, "learning_rate": 3.8343589743589746e-05, "loss": 0.0078, "step": 5050 }, { "epoch": 7.02, "learning_rate": 3.8279487179487186e-05, "loss": 0.0088, "step": 5075 }, { "epoch": 7.02, "learning_rate": 3.821538461538462e-05, "loss": 0.0075, "step": 5100 }, { "epoch": 7.02, "learning_rate": 3.815128205128206e-05, "loss": 0.0076, "step": 5125 }, { "epoch": 7.03, "learning_rate": 3.808717948717948e-05, "loss": 0.0073, "step": 5150 }, { "epoch": 7.03, "learning_rate": 3.802307692307692e-05, "loss": 0.0072, "step": 5175 }, { "epoch": 7.03, "learning_rate": 3.795897435897436e-05, "loss": 0.029, "step": 5200 }, { "epoch": 7.03, "eval_cer": 23.035666398498257, "eval_loss": 0.4823973476886749, "eval_runtime": 133.963, "eval_samples_per_second": 7.465, "eval_steps_per_second": 0.47, "step": 5200 }, { "epoch": 7.03, "learning_rate": 3.7894871794871794e-05, "loss": 0.0431, "step": 5225 }, { "epoch": 7.03, "learning_rate": 3.783076923076923e-05, "loss": 0.0445, "step": 5250 }, { "epoch": 7.03, "learning_rate": 3.7766666666666665e-05, "loss": 0.0228, "step": 5275 }, { "epoch": 7.03, "learning_rate": 3.7702564102564105e-05, "loss": 0.0241, "step": 5300 }, { "epoch": 8.0, "learning_rate": 3.763846153846154e-05, "loss": 0.0082, "step": 5325 }, { "epoch": 8.0, "learning_rate": 3.7574358974358976e-05, "loss": 0.0071, "step": 5350 }, { "epoch": 8.0, "learning_rate": 3.751025641025641e-05, "loss": 0.0063, "step": 5375 }, { "epoch": 8.01, "learning_rate": 3.744615384615385e-05, "loss": 0.0067, "step": 5400 }, { "epoch": 8.01, "learning_rate": 3.738205128205128e-05, "loss": 0.0066, "step": 5425 }, { "epoch": 8.01, "learning_rate": 3.731794871794872e-05, "loss": 0.0058, "step": 5450 }, { "epoch": 8.01, "learning_rate": 3.725384615384616e-05, "loss": 0.0066, "step": 5475 }, { "epoch": 8.01, "learning_rate": 3.718974358974359e-05, "loss": 0.0059, "step": 5500 }, { "epoch": 8.01, "learning_rate": 3.712564102564103e-05, "loss": 0.0076, "step": 5525 }, { "epoch": 8.01, "learning_rate": 3.706153846153846e-05, "loss": 0.0076, "step": 5550 }, { "epoch": 8.01, "learning_rate": 3.69974358974359e-05, "loss": 0.0068, "step": 5575 }, { "epoch": 8.02, "learning_rate": 3.6933333333333334e-05, "loss": 0.0051, "step": 5600 }, { "epoch": 8.02, "eval_cer": 19.95172968624296, "eval_loss": 0.5466642379760742, "eval_runtime": 124.677, "eval_samples_per_second": 8.021, "eval_steps_per_second": 0.505, "step": 5600 }, { "epoch": 8.02, "learning_rate": 3.6871794871794877e-05, "loss": 0.0059, "step": 5625 }, { "epoch": 8.02, "learning_rate": 3.680769230769231e-05, "loss": 0.0052, "step": 5650 }, { "epoch": 8.02, "learning_rate": 3.674358974358975e-05, "loss": 0.0062, "step": 5675 }, { "epoch": 8.02, "learning_rate": 3.667948717948718e-05, "loss": 0.0061, "step": 5700 }, { "epoch": 8.02, "learning_rate": 3.661538461538462e-05, "loss": 0.0066, "step": 5725 }, { "epoch": 8.02, "learning_rate": 3.655128205128205e-05, "loss": 0.0053, "step": 5750 }, { "epoch": 8.02, "learning_rate": 3.648717948717949e-05, "loss": 0.0066, "step": 5775 }, { "epoch": 8.03, "learning_rate": 3.6423076923076924e-05, "loss": 0.0057, "step": 5800 }, { "epoch": 8.03, "learning_rate": 3.635897435897436e-05, "loss": 0.0074, "step": 5825 }, { "epoch": 8.03, "learning_rate": 3.6294871794871795e-05, "loss": 0.0059, "step": 5850 }, { "epoch": 8.03, "learning_rate": 3.6230769230769235e-05, "loss": 0.035, "step": 5875 }, { "epoch": 8.03, "learning_rate": 3.6166666666666674e-05, "loss": 0.037, "step": 5900 }, { "epoch": 8.03, "learning_rate": 3.6102564102564106e-05, "loss": 0.0253, "step": 5925 }, { "epoch": 8.03, "learning_rate": 3.603846153846154e-05, "loss": 0.0147, "step": 5950 }, { "epoch": 9.0, "learning_rate": 3.597435897435897e-05, "loss": 0.0148, "step": 5975 }, { "epoch": 9.0, "learning_rate": 3.591025641025641e-05, "loss": 0.0071, "step": 6000 }, { "epoch": 9.0, "eval_cer": 18.557253955484043, "eval_loss": 0.4980410635471344, "eval_runtime": 126.109, "eval_samples_per_second": 7.93, "eval_steps_per_second": 0.5, "step": 6000 }, { "epoch": 9.0, "learning_rate": 3.584615384615384e-05, "loss": 0.0056, "step": 6025 }, { "epoch": 9.0, "learning_rate": 3.578205128205128e-05, "loss": 0.0049, "step": 6050 }, { "epoch": 9.01, "learning_rate": 3.571794871794872e-05, "loss": 0.0053, "step": 6075 }, { "epoch": 9.01, "learning_rate": 3.5653846153846154e-05, "loss": 0.0046, "step": 6100 }, { "epoch": 9.01, "learning_rate": 3.558974358974359e-05, "loss": 0.0047, "step": 6125 }, { "epoch": 9.01, "learning_rate": 3.5525641025641025e-05, "loss": 0.0046, "step": 6150 }, { "epoch": 9.01, "learning_rate": 3.5461538461538464e-05, "loss": 0.0063, "step": 6175 }, { "epoch": 9.01, "learning_rate": 3.53974358974359e-05, "loss": 0.0056, "step": 6200 }, { "epoch": 9.01, "learning_rate": 3.5333333333333336e-05, "loss": 0.006, "step": 6225 }, { "epoch": 9.01, "learning_rate": 3.526923076923077e-05, "loss": 0.005, "step": 6250 }, { "epoch": 9.02, "learning_rate": 3.520512820512821e-05, "loss": 0.0046, "step": 6275 }, { "epoch": 9.02, "learning_rate": 3.514102564102564e-05, "loss": 0.0047, "step": 6300 }, { "epoch": 9.02, "learning_rate": 3.507692307692308e-05, "loss": 0.0049, "step": 6325 }, { "epoch": 9.02, "learning_rate": 3.501282051282052e-05, "loss": 0.0048, "step": 6350 }, { "epoch": 9.02, "learning_rate": 3.494871794871795e-05, "loss": 0.0045, "step": 6375 }, { "epoch": 9.02, "learning_rate": 3.488461538461539e-05, "loss": 0.0046, "step": 6400 }, { "epoch": 9.02, "eval_cer": 20.872441226423526, "eval_loss": 0.5338811278343201, "eval_runtime": 125.9373, "eval_samples_per_second": 7.94, "eval_steps_per_second": 0.5, "step": 6400 }, { "epoch": 9.02, "learning_rate": 3.482051282051282e-05, "loss": 0.0039, "step": 6425 }, { "epoch": 9.02, "learning_rate": 3.475641025641026e-05, "loss": 0.0042, "step": 6450 }, { "epoch": 9.03, "learning_rate": 3.4692307692307694e-05, "loss": 0.0048, "step": 6475 }, { "epoch": 9.03, "learning_rate": 3.4628205128205133e-05, "loss": 0.0064, "step": 6500 }, { "epoch": 9.03, "learning_rate": 3.4564102564102566e-05, "loss": 0.0158, "step": 6525 }, { "epoch": 9.03, "learning_rate": 3.45e-05, "loss": 0.0284, "step": 6550 }, { "epoch": 9.03, "learning_rate": 3.443589743589744e-05, "loss": 0.0272, "step": 6575 }, { "epoch": 9.03, "learning_rate": 3.437179487179487e-05, "loss": 0.0141, "step": 6600 }, { "epoch": 9.03, "learning_rate": 3.430769230769231e-05, "loss": 0.0147, "step": 6625 }, { "epoch": 10.0, "learning_rate": 3.424358974358974e-05, "loss": 0.0062, "step": 6650 }, { "epoch": 10.0, "learning_rate": 3.417948717948718e-05, "loss": 0.0068, "step": 6675 }, { "epoch": 10.0, "learning_rate": 3.411538461538461e-05, "loss": 0.0047, "step": 6700 }, { "epoch": 10.01, "learning_rate": 3.405128205128205e-05, "loss": 0.0041, "step": 6725 }, { "epoch": 10.01, "learning_rate": 3.3987179487179485e-05, "loss": 0.0041, "step": 6750 }, { "epoch": 10.01, "learning_rate": 3.3923076923076924e-05, "loss": 0.0043, "step": 6775 }, { "epoch": 10.01, "learning_rate": 3.385897435897436e-05, "loss": 0.0045, "step": 6800 }, { "epoch": 10.01, "eval_cer": 18.432108697595424, "eval_loss": 0.5061790943145752, "eval_runtime": 125.241, "eval_samples_per_second": 7.985, "eval_steps_per_second": 0.503, "step": 6800 }, { "epoch": 10.01, "learning_rate": 3.3794871794871796e-05, "loss": 0.0046, "step": 6825 }, { "epoch": 10.01, "learning_rate": 3.3730769230769235e-05, "loss": 0.0052, "step": 6850 }, { "epoch": 10.01, "learning_rate": 3.366666666666667e-05, "loss": 0.0055, "step": 6875 }, { "epoch": 10.01, "learning_rate": 3.3602564102564107e-05, "loss": 0.0053, "step": 6900 }, { "epoch": 10.02, "learning_rate": 3.353846153846154e-05, "loss": 0.0049, "step": 6925 }, { "epoch": 10.02, "learning_rate": 3.347435897435898e-05, "loss": 0.0052, "step": 6950 }, { "epoch": 10.02, "learning_rate": 3.341025641025641e-05, "loss": 0.0044, "step": 6975 }, { "epoch": 10.02, "learning_rate": 3.334615384615385e-05, "loss": 0.0046, "step": 7000 }, { "epoch": 10.02, "learning_rate": 3.328205128205128e-05, "loss": 0.0044, "step": 7025 }, { "epoch": 10.02, "learning_rate": 3.321794871794872e-05, "loss": 0.0053, "step": 7050 }, { "epoch": 10.02, "learning_rate": 3.315384615384616e-05, "loss": 0.0038, "step": 7075 }, { "epoch": 10.02, "learning_rate": 3.308974358974359e-05, "loss": 0.0039, "step": 7100 }, { "epoch": 10.03, "learning_rate": 3.302564102564103e-05, "loss": 0.0049, "step": 7125 }, { "epoch": 10.03, "learning_rate": 3.296153846153846e-05, "loss": 0.0056, "step": 7150 }, { "epoch": 10.03, "learning_rate": 3.28974358974359e-05, "loss": 0.0051, "step": 7175 }, { "epoch": 10.03, "learning_rate": 3.283333333333333e-05, "loss": 0.0186, "step": 7200 }, { "epoch": 10.03, "eval_cer": 18.128184499865917, "eval_loss": 0.47741541266441345, "eval_runtime": 124.6703, "eval_samples_per_second": 8.021, "eval_steps_per_second": 0.505, "step": 7200 } ], "logging_steps": 25, "max_steps": 20000, "num_train_epochs": 9223372036854775807, "save_steps": 800, "total_flos": 5.97730266611712e+19, "trial_name": null, "trial_params": null }