{ "best_metric": 0.23875188827514648, "best_model_checkpoint": "./checkpoint-13500", "epoch": 2.0, "global_step": 13822, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.533333333333333e-06, "loss": 17.2403, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.32e-05, "loss": 10.2311, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.9800000000000004e-05, "loss": 7.834, "step": 300 }, { "epoch": 0.06, "learning_rate": 2.646666666666667e-05, "loss": 6.0656, "step": 400 }, { "epoch": 0.07, "learning_rate": 3.313333333333333e-05, "loss": 4.3748, "step": 500 }, { "epoch": 0.07, "eval_loss": 3.878422975540161, "eval_runtime": 285.8223, "eval_samples_per_second": 20.264, "eval_steps_per_second": 0.318, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.09, "learning_rate": 3.9800000000000005e-05, "loss": 3.2923, "step": 600 }, { "epoch": 0.1, "learning_rate": 4.646666666666667e-05, "loss": 2.9475, "step": 700 }, { "epoch": 0.12, "learning_rate": 5.3133333333333335e-05, "loss": 2.8639, "step": 800 }, { "epoch": 0.13, "learning_rate": 5.9800000000000003e-05, "loss": 2.8265, "step": 900 }, { "epoch": 0.14, "learning_rate": 6.646666666666667e-05, "loss": 2.8068, "step": 1000 }, { "epoch": 0.14, "eval_loss": 2.828850746154785, "eval_runtime": 292.3877, "eval_samples_per_second": 19.809, "eval_steps_per_second": 0.311, "eval_wer": 0.9826485059793412, "step": 1000 }, { "epoch": 0.16, "learning_rate": 7.306666666666668e-05, "loss": 2.779, "step": 1100 }, { "epoch": 0.17, "learning_rate": 7.973333333333334e-05, "loss": 2.6402, "step": 1200 }, { "epoch": 0.19, "learning_rate": 8.64e-05, "loss": 2.1119, "step": 1300 }, { "epoch": 0.2, "learning_rate": 9.306666666666667e-05, "loss": 1.7965, "step": 1400 }, { "epoch": 0.22, "learning_rate": 9.973333333333334e-05, "loss": 1.6698, "step": 1500 }, { "epoch": 0.22, "eval_loss": 0.881136417388916, "eval_runtime": 297.1806, "eval_samples_per_second": 19.49, "eval_steps_per_second": 0.306, "eval_wer": 0.7127472384241911, "step": 1500 }, { "epoch": 0.23, "learning_rate": 9.92209056971271e-05, "loss": 1.5882, "step": 1600 }, { "epoch": 0.25, "learning_rate": 9.840934913163448e-05, "loss": 1.5172, "step": 1700 }, { "epoch": 0.26, "learning_rate": 9.759779256614186e-05, "loss": 1.4579, "step": 1800 }, { "epoch": 0.27, "learning_rate": 9.678623600064926e-05, "loss": 1.3829, "step": 1900 }, { "epoch": 0.29, "learning_rate": 9.597467943515663e-05, "loss": 1.3488, "step": 2000 }, { "epoch": 0.29, "eval_loss": 0.516592800617218, "eval_runtime": 301.2842, "eval_samples_per_second": 19.224, "eval_steps_per_second": 0.302, "eval_wer": 0.5369024731988661, "step": 2000 }, { "epoch": 0.3, "learning_rate": 9.516312286966402e-05, "loss": 1.2981, "step": 2100 }, { "epoch": 0.32, "learning_rate": 9.43515663041714e-05, "loss": 1.2845, "step": 2200 }, { "epoch": 0.33, "learning_rate": 9.354000973867879e-05, "loss": 1.2459, "step": 2300 }, { "epoch": 0.35, "learning_rate": 9.272845317318618e-05, "loss": 1.2255, "step": 2400 }, { "epoch": 0.36, "learning_rate": 9.191689660769356e-05, "loss": 1.2239, "step": 2500 }, { "epoch": 0.36, "eval_loss": 0.4104757010936737, "eval_runtime": 299.1395, "eval_samples_per_second": 19.362, "eval_steps_per_second": 0.304, "eval_wer": 0.474111245071524, "step": 2500 }, { "epoch": 0.38, "learning_rate": 9.110534004220094e-05, "loss": 1.2024, "step": 2600 }, { "epoch": 0.39, "learning_rate": 9.030189904236326e-05, "loss": 1.1851, "step": 2700 }, { "epoch": 0.41, "learning_rate": 8.949034247687063e-05, "loss": 1.1768, "step": 2800 }, { "epoch": 0.42, "learning_rate": 8.867878591137803e-05, "loss": 1.1641, "step": 2900 }, { "epoch": 0.43, "learning_rate": 8.786722934588541e-05, "loss": 1.1537, "step": 3000 }, { "epoch": 0.43, "eval_loss": 0.35850802063941956, "eval_runtime": 299.662, "eval_samples_per_second": 19.328, "eval_steps_per_second": 0.304, "eval_wer": 0.4448499462348073, "step": 3000 }, { "epoch": 0.45, "learning_rate": 8.70556727803928e-05, "loss": 1.1449, "step": 3100 }, { "epoch": 0.46, "learning_rate": 8.624411621490018e-05, "loss": 1.1379, "step": 3200 }, { "epoch": 0.48, "learning_rate": 8.543255964940758e-05, "loss": 1.1331, "step": 3300 }, { "epoch": 0.49, "learning_rate": 8.462100308391495e-05, "loss": 1.1205, "step": 3400 }, { "epoch": 0.51, "learning_rate": 8.380944651842234e-05, "loss": 1.1184, "step": 3500 }, { "epoch": 0.51, "eval_loss": 0.333638072013855, "eval_runtime": 297.0402, "eval_samples_per_second": 19.499, "eval_steps_per_second": 0.306, "eval_wer": 0.42922545537489004, "step": 3500 }, { "epoch": 0.52, "learning_rate": 8.299788995292971e-05, "loss": 1.1014, "step": 3600 }, { "epoch": 0.54, "learning_rate": 8.218633338743711e-05, "loss": 1.1114, "step": 3700 }, { "epoch": 0.55, "learning_rate": 8.13747768219445e-05, "loss": 1.117, "step": 3800 }, { "epoch": 0.56, "learning_rate": 8.056322025645188e-05, "loss": 1.102, "step": 3900 }, { "epoch": 0.58, "learning_rate": 7.975166369095926e-05, "loss": 1.0968, "step": 4000 }, { "epoch": 0.58, "eval_loss": 0.31949570775032043, "eval_runtime": 296.0172, "eval_samples_per_second": 19.566, "eval_steps_per_second": 0.307, "eval_wer": 0.4180162273127179, "step": 4000 }, { "epoch": 0.59, "learning_rate": 7.894822269112158e-05, "loss": 1.0942, "step": 4100 }, { "epoch": 0.61, "learning_rate": 7.813666612562897e-05, "loss": 1.0859, "step": 4200 }, { "epoch": 0.62, "learning_rate": 7.732510956013635e-05, "loss": 1.0767, "step": 4300 }, { "epoch": 0.64, "learning_rate": 7.652166856029866e-05, "loss": 1.0766, "step": 4400 }, { "epoch": 0.65, "learning_rate": 7.571011199480604e-05, "loss": 1.0737, "step": 4500 }, { "epoch": 0.65, "eval_loss": 0.30754634737968445, "eval_runtime": 296.2378, "eval_samples_per_second": 19.552, "eval_steps_per_second": 0.307, "eval_wer": 0.41408973899442797, "step": 4500 }, { "epoch": 0.67, "learning_rate": 7.489855542931342e-05, "loss": 1.0807, "step": 4600 }, { "epoch": 0.68, "learning_rate": 7.40869988638208e-05, "loss": 1.071, "step": 4700 }, { "epoch": 0.69, "learning_rate": 7.32754422983282e-05, "loss": 1.0613, "step": 4800 }, { "epoch": 0.71, "learning_rate": 7.246388573283557e-05, "loss": 1.0635, "step": 4900 }, { "epoch": 0.72, "learning_rate": 7.165232916734297e-05, "loss": 1.0677, "step": 5000 }, { "epoch": 0.72, "eval_loss": 0.30150118470191956, "eval_runtime": 297.4742, "eval_samples_per_second": 19.471, "eval_steps_per_second": 0.306, "eval_wer": 0.4089250219948516, "step": 5000 }, { "epoch": 0.74, "learning_rate": 7.084077260185034e-05, "loss": 1.0707, "step": 5100 }, { "epoch": 0.75, "learning_rate": 7.002921603635774e-05, "loss": 1.0617, "step": 5200 }, { "epoch": 0.77, "learning_rate": 6.921765947086512e-05, "loss": 1.0566, "step": 5300 }, { "epoch": 0.78, "learning_rate": 6.84061029053725e-05, "loss": 1.0518, "step": 5400 }, { "epoch": 0.8, "learning_rate": 6.760266190553481e-05, "loss": 1.0462, "step": 5500 }, { "epoch": 0.8, "eval_loss": 0.297052800655365, "eval_runtime": 296.1592, "eval_samples_per_second": 19.557, "eval_steps_per_second": 0.307, "eval_wer": 0.4077193782788621, "step": 5500 }, { "epoch": 0.81, "learning_rate": 6.679110534004221e-05, "loss": 1.0514, "step": 5600 }, { "epoch": 0.82, "learning_rate": 6.597954877454959e-05, "loss": 1.0446, "step": 5700 }, { "epoch": 0.84, "learning_rate": 6.516799220905698e-05, "loss": 1.0358, "step": 5800 }, { "epoch": 0.85, "learning_rate": 6.435643564356436e-05, "loss": 1.0364, "step": 5900 }, { "epoch": 0.87, "learning_rate": 6.354487907807174e-05, "loss": 1.0392, "step": 6000 }, { "epoch": 0.87, "eval_loss": 0.2870033383369446, "eval_runtime": 295.9814, "eval_samples_per_second": 19.569, "eval_steps_per_second": 0.307, "eval_wer": 0.3997034768157972, "step": 6000 }, { "epoch": 0.88, "learning_rate": 6.273332251257913e-05, "loss": 1.0375, "step": 6100 }, { "epoch": 0.9, "learning_rate": 6.192176594708652e-05, "loss": 1.0408, "step": 6200 }, { "epoch": 0.91, "learning_rate": 6.11102093815939e-05, "loss": 1.0382, "step": 6300 }, { "epoch": 0.93, "learning_rate": 6.0298652816101284e-05, "loss": 1.0335, "step": 6400 }, { "epoch": 0.94, "learning_rate": 5.948709625060867e-05, "loss": 1.0178, "step": 6500 }, { "epoch": 0.94, "eval_loss": 0.28046590089797974, "eval_runtime": 297.8045, "eval_samples_per_second": 19.449, "eval_steps_per_second": 0.306, "eval_wer": 0.39629834794225943, "step": 6500 }, { "epoch": 0.95, "learning_rate": 5.867553968511605e-05, "loss": 1.0312, "step": 6600 }, { "epoch": 0.97, "learning_rate": 5.786398311962344e-05, "loss": 1.033, "step": 6700 }, { "epoch": 0.98, "learning_rate": 5.7052426554130825e-05, "loss": 1.0289, "step": 6800 }, { "epoch": 1.0, "learning_rate": 5.624086998863821e-05, "loss": 1.027, "step": 6900 }, { "epoch": 1.01, "learning_rate": 5.54293134231456e-05, "loss": 0.992, "step": 7000 }, { "epoch": 1.01, "eval_loss": 0.2747785747051239, "eval_runtime": 298.4841, "eval_samples_per_second": 19.405, "eval_steps_per_second": 0.305, "eval_wer": 0.39352862589201343, "step": 7000 }, { "epoch": 1.03, "learning_rate": 5.461775685765298e-05, "loss": 1.0025, "step": 7100 }, { "epoch": 1.04, "learning_rate": 5.3806200292160366e-05, "loss": 1.0122, "step": 7200 }, { "epoch": 1.06, "learning_rate": 5.299464372666775e-05, "loss": 1.018, "step": 7300 }, { "epoch": 1.07, "learning_rate": 5.218308716117514e-05, "loss": 0.9936, "step": 7400 }, { "epoch": 1.09, "learning_rate": 5.137153059568252e-05, "loss": 1.0197, "step": 7500 }, { "epoch": 1.09, "eval_loss": 0.26907604932785034, "eval_runtime": 298.796, "eval_samples_per_second": 19.384, "eval_steps_per_second": 0.305, "eval_wer": 0.3884453713056796, "step": 7500 }, { "epoch": 1.1, "learning_rate": 5.055997403018991e-05, "loss": 1.008, "step": 7600 }, { "epoch": 1.11, "learning_rate": 4.97484174646973e-05, "loss": 1.0028, "step": 7700 }, { "epoch": 1.13, "learning_rate": 4.893686089920468e-05, "loss": 0.9929, "step": 7800 }, { "epoch": 1.14, "learning_rate": 4.8125304333712064e-05, "loss": 0.995, "step": 7900 }, { "epoch": 1.16, "learning_rate": 4.731374776821945e-05, "loss": 1.0056, "step": 8000 }, { "epoch": 1.16, "eval_loss": 0.26817116141319275, "eval_runtime": 298.1504, "eval_samples_per_second": 19.426, "eval_steps_per_second": 0.305, "eval_wer": 0.3888526833718922, "step": 8000 }, { "epoch": 1.17, "learning_rate": 4.6510306768381754e-05, "loss": 0.9971, "step": 8100 }, { "epoch": 1.19, "learning_rate": 4.5698750202889145e-05, "loss": 0.9976, "step": 8200 }, { "epoch": 1.2, "learning_rate": 4.488719363739653e-05, "loss": 1.0014, "step": 8300 }, { "epoch": 1.22, "learning_rate": 4.407563707190391e-05, "loss": 0.9835, "step": 8400 }, { "epoch": 1.23, "learning_rate": 4.3272196072066225e-05, "loss": 0.9826, "step": 8500 }, { "epoch": 1.23, "eval_loss": 0.26473307609558105, "eval_runtime": 299.0161, "eval_samples_per_second": 19.37, "eval_steps_per_second": 0.304, "eval_wer": 0.38675095311023494, "step": 8500 }, { "epoch": 1.24, "learning_rate": 4.2460639506573615e-05, "loss": 0.9838, "step": 8600 }, { "epoch": 1.26, "learning_rate": 4.1649082941081e-05, "loss": 0.9836, "step": 8700 }, { "epoch": 1.27, "learning_rate": 4.0845641941243305e-05, "loss": 0.9824, "step": 8800 }, { "epoch": 1.29, "learning_rate": 4.003408537575069e-05, "loss": 0.9715, "step": 8900 }, { "epoch": 1.3, "learning_rate": 3.922252881025807e-05, "loss": 0.9815, "step": 9000 }, { "epoch": 1.3, "eval_loss": 0.26034271717071533, "eval_runtime": 299.6782, "eval_samples_per_second": 19.327, "eval_steps_per_second": 0.304, "eval_wer": 0.3831828994102121, "step": 9000 }, { "epoch": 1.32, "learning_rate": 3.841097224476546e-05, "loss": 0.9757, "step": 9100 }, { "epoch": 1.33, "learning_rate": 3.7599415679272846e-05, "loss": 0.9689, "step": 9200 }, { "epoch": 1.35, "learning_rate": 3.678785911378023e-05, "loss": 0.9778, "step": 9300 }, { "epoch": 1.36, "learning_rate": 3.5976302548287614e-05, "loss": 0.9794, "step": 9400 }, { "epoch": 1.37, "learning_rate": 3.5164745982795e-05, "loss": 0.9717, "step": 9500 }, { "epoch": 1.37, "eval_loss": 0.25609487295150757, "eval_runtime": 299.6976, "eval_samples_per_second": 19.326, "eval_steps_per_second": 0.304, "eval_wer": 0.3807064420476392, "step": 9500 }, { "epoch": 1.39, "learning_rate": 3.435318941730239e-05, "loss": 0.9752, "step": 9600 }, { "epoch": 1.4, "learning_rate": 3.354163285180977e-05, "loss": 0.965, "step": 9700 }, { "epoch": 1.42, "learning_rate": 3.2730076286317155e-05, "loss": 0.9522, "step": 9800 }, { "epoch": 1.43, "learning_rate": 3.191851972082454e-05, "loss": 0.9718, "step": 9900 }, { "epoch": 1.45, "learning_rate": 3.110696315533193e-05, "loss": 0.9605, "step": 10000 }, { "epoch": 1.45, "eval_loss": 0.25231894850730896, "eval_runtime": 297.5796, "eval_samples_per_second": 19.464, "eval_steps_per_second": 0.306, "eval_wer": 0.3782951546156603, "step": 10000 }, { "epoch": 1.46, "learning_rate": 3.0295406589839315e-05, "loss": 0.9635, "step": 10100 }, { "epoch": 1.48, "learning_rate": 2.94838500243467e-05, "loss": 0.9632, "step": 10200 }, { "epoch": 1.49, "learning_rate": 2.8672293458854082e-05, "loss": 0.9548, "step": 10300 }, { "epoch": 1.5, "learning_rate": 2.7868852459016392e-05, "loss": 0.9554, "step": 10400 }, { "epoch": 1.52, "learning_rate": 2.7057295893523783e-05, "loss": 0.96, "step": 10500 }, { "epoch": 1.52, "eval_loss": 0.24939315021038055, "eval_runtime": 300.036, "eval_samples_per_second": 19.304, "eval_steps_per_second": 0.303, "eval_wer": 0.3787513441298185, "step": 10500 }, { "epoch": 1.53, "learning_rate": 2.6245739328031166e-05, "loss": 0.9611, "step": 10600 }, { "epoch": 1.55, "learning_rate": 2.543418276253855e-05, "loss": 0.9594, "step": 10700 }, { "epoch": 1.56, "learning_rate": 2.4622626197045937e-05, "loss": 0.9589, "step": 10800 }, { "epoch": 1.58, "learning_rate": 2.381106963155332e-05, "loss": 0.9441, "step": 10900 }, { "epoch": 1.59, "learning_rate": 2.2999513066060704e-05, "loss": 0.9442, "step": 11000 }, { "epoch": 1.59, "eval_loss": 0.24783751368522644, "eval_runtime": 297.7741, "eval_samples_per_second": 19.451, "eval_steps_per_second": 0.306, "eval_wer": 0.3760142070448695, "step": 11000 }, { "epoch": 1.61, "learning_rate": 2.218795650056809e-05, "loss": 0.9496, "step": 11100 }, { "epoch": 1.62, "learning_rate": 2.1376399935075474e-05, "loss": 0.9486, "step": 11200 }, { "epoch": 1.64, "learning_rate": 2.056484336958286e-05, "loss": 0.9558, "step": 11300 }, { "epoch": 1.65, "learning_rate": 1.9753286804090245e-05, "loss": 0.9486, "step": 11400 }, { "epoch": 1.66, "learning_rate": 1.8941730238597632e-05, "loss": 0.9564, "step": 11500 }, { "epoch": 1.66, "eval_loss": 0.2454409897327423, "eval_runtime": 296.0265, "eval_samples_per_second": 19.566, "eval_steps_per_second": 0.307, "eval_wer": 0.3733096549252175, "step": 11500 }, { "epoch": 1.68, "learning_rate": 1.8130173673105015e-05, "loss": 0.9427, "step": 11600 }, { "epoch": 1.69, "learning_rate": 1.73186171076124e-05, "loss": 0.9423, "step": 11700 }, { "epoch": 1.71, "learning_rate": 1.6507060542119786e-05, "loss": 0.9503, "step": 11800 }, { "epoch": 1.72, "learning_rate": 1.5695503976627173e-05, "loss": 0.9383, "step": 11900 }, { "epoch": 1.74, "learning_rate": 1.4883947411134558e-05, "loss": 0.9436, "step": 12000 }, { "epoch": 1.74, "eval_loss": 0.24390804767608643, "eval_runtime": 295.4584, "eval_samples_per_second": 19.603, "eval_steps_per_second": 0.308, "eval_wer": 0.37467822346769203, "step": 12000 }, { "epoch": 1.75, "learning_rate": 1.4072390845641942e-05, "loss": 0.9491, "step": 12100 }, { "epoch": 1.77, "learning_rate": 1.3260834280149325e-05, "loss": 0.9419, "step": 12200 }, { "epoch": 1.78, "learning_rate": 1.2449277714656712e-05, "loss": 0.9517, "step": 12300 }, { "epoch": 1.79, "learning_rate": 1.1637721149164097e-05, "loss": 0.9367, "step": 12400 }, { "epoch": 1.81, "learning_rate": 1.0826164583671483e-05, "loss": 0.938, "step": 12500 }, { "epoch": 1.81, "eval_loss": 0.24111612141132355, "eval_runtime": 296.9314, "eval_samples_per_second": 19.506, "eval_steps_per_second": 0.306, "eval_wer": 0.37159894424712436, "step": 12500 }, { "epoch": 1.82, "learning_rate": 1.0014608018178868e-05, "loss": 0.9337, "step": 12600 }, { "epoch": 1.84, "learning_rate": 9.203051452686253e-06, "loss": 0.9284, "step": 12700 }, { "epoch": 1.85, "learning_rate": 8.391494887193638e-06, "loss": 0.938, "step": 12800 }, { "epoch": 1.87, "learning_rate": 7.579938321701023e-06, "loss": 0.9365, "step": 12900 }, { "epoch": 1.88, "learning_rate": 6.768381756208409e-06, "loss": 0.9353, "step": 13000 }, { "epoch": 1.88, "eval_loss": 0.23965783417224884, "eval_runtime": 296.6078, "eval_samples_per_second": 19.527, "eval_steps_per_second": 0.307, "eval_wer": 0.3697904786731402, "step": 13000 }, { "epoch": 1.9, "learning_rate": 5.956825190715793e-06, "loss": 0.9413, "step": 13100 }, { "epoch": 1.91, "learning_rate": 5.153384190878104e-06, "loss": 0.9356, "step": 13200 }, { "epoch": 1.92, "learning_rate": 4.34182762538549e-06, "loss": 0.9209, "step": 13300 }, { "epoch": 1.94, "learning_rate": 3.530271059892875e-06, "loss": 0.9362, "step": 13400 }, { "epoch": 1.95, "learning_rate": 2.71871449440026e-06, "loss": 0.9271, "step": 13500 }, { "epoch": 1.95, "eval_loss": 0.23875188827514648, "eval_runtime": 296.9414, "eval_samples_per_second": 19.506, "eval_steps_per_second": 0.306, "eval_wer": 0.3680797679950471, "step": 13500 }, { "epoch": 1.97, "learning_rate": 1.907157928907645e-06, "loss": 0.9288, "step": 13600 }, { "epoch": 1.98, "learning_rate": 1.0956013634150302e-06, "loss": 0.9345, "step": 13700 }, { "epoch": 2.0, "learning_rate": 2.840447979224152e-07, "loss": 0.9326, "step": 13800 }, { "epoch": 2.0, "step": 13822, "total_flos": 1.2600843645735263e+20, "train_loss": 1.442369053426242, "train_runtime": 53680.5392, "train_samples_per_second": 16.478, "train_steps_per_second": 0.257 } ], "max_steps": 13822, "num_train_epochs": 2, "total_flos": 1.2600843645735263e+20, "trial_name": null, "trial_params": null }