{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 2500, "global_step": 29096, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.230240549828179e-07, "loss": 19.3118, "step": 50 }, { "epoch": 0.03, "learning_rate": 6.597938144329897e-07, "loss": 20.4803, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.0034364261168387e-06, "loss": 18.9059, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.3470790378006874e-06, "loss": 18.7665, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.6838487972508594e-06, "loss": 19.14, "step": 250 }, { "epoch": 0.08, "learning_rate": 2.027491408934708e-06, "loss": 17.1733, "step": 300 }, { "epoch": 0.1, "learning_rate": 2.3711340206185566e-06, "loss": 15.9423, "step": 350 }, { "epoch": 0.11, "learning_rate": 2.7147766323024053e-06, "loss": 14.486, "step": 400 }, { "epoch": 0.12, "learning_rate": 3.058419243986255e-06, "loss": 13.0871, "step": 450 }, { "epoch": 0.14, "learning_rate": 3.4020618556701037e-06, "loss": 12.2208, "step": 500 }, { "epoch": 0.15, "learning_rate": 3.7457044673539524e-06, "loss": 10.7599, "step": 550 }, { "epoch": 0.16, "learning_rate": 4.089347079037801e-06, "loss": 10.0171, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.4329896907216494e-06, "loss": 9.0393, "step": 650 }, { "epoch": 0.19, "learning_rate": 4.776632302405499e-06, "loss": 8.3246, "step": 700 }, { "epoch": 0.21, "learning_rate": 5.120274914089347e-06, "loss": 7.904, "step": 750 }, { "epoch": 0.22, "learning_rate": 5.463917525773196e-06, "loss": 7.243, "step": 800 }, { "epoch": 0.23, "learning_rate": 5.807560137457045e-06, "loss": 6.8447, "step": 850 }, { "epoch": 0.25, "learning_rate": 6.151202749140894e-06, "loss": 6.3023, "step": 900 }, { "epoch": 0.26, "learning_rate": 6.494845360824743e-06, "loss": 5.8747, "step": 950 }, { "epoch": 0.27, "learning_rate": 6.8316151202749144e-06, "loss": 5.9247, "step": 1000 }, { "epoch": 0.29, "learning_rate": 7.175257731958763e-06, "loss": 5.337, "step": 1050 }, { "epoch": 0.3, "learning_rate": 7.518900343642612e-06, "loss": 5.0652, "step": 1100 }, { "epoch": 0.32, "learning_rate": 7.862542955326461e-06, "loss": 4.7917, "step": 1150 }, { "epoch": 0.33, "learning_rate": 8.20618556701031e-06, "loss": 4.7247, "step": 1200 }, { "epoch": 0.34, "learning_rate": 8.549828178694159e-06, "loss": 4.3759, "step": 1250 }, { "epoch": 0.36, "learning_rate": 8.893470790378007e-06, "loss": 4.1952, "step": 1300 }, { "epoch": 0.37, "learning_rate": 9.237113402061856e-06, "loss": 4.1534, "step": 1350 }, { "epoch": 0.38, "learning_rate": 9.580756013745706e-06, "loss": 3.9869, "step": 1400 }, { "epoch": 0.4, "learning_rate": 9.924398625429554e-06, "loss": 3.7676, "step": 1450 }, { "epoch": 0.41, "learning_rate": 1.0268041237113402e-05, "loss": 3.7434, "step": 1500 }, { "epoch": 0.43, "learning_rate": 1.0611683848797251e-05, "loss": 3.5925, "step": 1550 }, { "epoch": 0.44, "learning_rate": 1.0955326460481101e-05, "loss": 3.5214, "step": 1600 }, { "epoch": 0.45, "learning_rate": 1.1298969072164949e-05, "loss": 3.4825, "step": 1650 }, { "epoch": 0.47, "learning_rate": 1.1642611683848798e-05, "loss": 3.4556, "step": 1700 }, { "epoch": 0.48, "learning_rate": 1.1986254295532646e-05, "loss": 3.4135, "step": 1750 }, { "epoch": 0.49, "learning_rate": 1.2329896907216496e-05, "loss": 3.3999, "step": 1800 }, { "epoch": 0.51, "learning_rate": 1.2673539518900344e-05, "loss": 3.3894, "step": 1850 }, { "epoch": 0.52, "learning_rate": 1.3017182130584193e-05, "loss": 3.4133, "step": 1900 }, { "epoch": 0.54, "learning_rate": 1.3360824742268041e-05, "loss": 3.3643, "step": 1950 }, { "epoch": 0.55, "learning_rate": 1.370446735395189e-05, "loss": 3.3633, "step": 2000 }, { "epoch": 0.56, "learning_rate": 1.4048109965635739e-05, "loss": 3.3607, "step": 2050 }, { "epoch": 0.58, "learning_rate": 1.4391752577319588e-05, "loss": 3.3537, "step": 2100 }, { "epoch": 0.59, "learning_rate": 1.4735395189003438e-05, "loss": 3.3888, "step": 2150 }, { "epoch": 0.6, "learning_rate": 1.5079037800687286e-05, "loss": 3.3809, "step": 2200 }, { "epoch": 0.62, "learning_rate": 1.5422680412371135e-05, "loss": 3.3425, "step": 2250 }, { "epoch": 0.63, "learning_rate": 1.5766323024054985e-05, "loss": 3.3477, "step": 2300 }, { "epoch": 0.65, "learning_rate": 1.6109965635738835e-05, "loss": 3.3413, "step": 2350 }, { "epoch": 0.66, "learning_rate": 1.645360824742268e-05, "loss": 3.3374, "step": 2400 }, { "epoch": 0.67, "learning_rate": 1.679725085910653e-05, "loss": 3.3442, "step": 2450 }, { "epoch": 0.69, "learning_rate": 1.714089347079038e-05, "loss": 3.3362, "step": 2500 }, { "epoch": 0.69, "eval_loss": 3.5583269596099854, "eval_runtime": 6.611, "eval_samples_per_second": 214.795, "eval_steps_per_second": 13.462, "eval_wer": 1.0, "step": 2500 }, { "epoch": 0.7, "learning_rate": 1.748453608247423e-05, "loss": 3.3359, "step": 2550 }, { "epoch": 0.71, "learning_rate": 1.782817869415808e-05, "loss": 3.335, "step": 2600 }, { "epoch": 0.73, "learning_rate": 1.8171821305841925e-05, "loss": 3.3365, "step": 2650 }, { "epoch": 0.74, "learning_rate": 1.8515463917525775e-05, "loss": 3.3351, "step": 2700 }, { "epoch": 0.76, "learning_rate": 1.8859106529209624e-05, "loss": 3.3371, "step": 2750 }, { "epoch": 0.77, "learning_rate": 1.9202749140893474e-05, "loss": 3.335, "step": 2800 }, { "epoch": 0.78, "learning_rate": 1.954639175257732e-05, "loss": 3.4417, "step": 2850 }, { "epoch": 0.8, "learning_rate": 1.989003436426117e-05, "loss": 3.3259, "step": 2900 }, { "epoch": 0.81, "learning_rate": 1.9974031925456354e-05, "loss": 3.3261, "step": 2950 }, { "epoch": 0.82, "learning_rate": 1.9935843580539222e-05, "loss": 3.3521, "step": 3000 }, { "epoch": 0.84, "learning_rate": 1.989765523562209e-05, "loss": 3.33, "step": 3050 }, { "epoch": 0.85, "learning_rate": 1.985946689070496e-05, "loss": 3.318, "step": 3100 }, { "epoch": 0.87, "learning_rate": 1.9821278545787827e-05, "loss": 3.3121, "step": 3150 }, { "epoch": 0.88, "learning_rate": 1.9783090200870695e-05, "loss": 3.3127, "step": 3200 }, { "epoch": 0.89, "learning_rate": 1.9744901855953564e-05, "loss": 3.3736, "step": 3250 }, { "epoch": 0.91, "learning_rate": 1.9706713511036432e-05, "loss": 3.3047, "step": 3300 }, { "epoch": 0.92, "learning_rate": 1.96685251661193e-05, "loss": 3.2964, "step": 3350 }, { "epoch": 0.93, "learning_rate": 1.9630336821202172e-05, "loss": 3.2949, "step": 3400 }, { "epoch": 0.95, "learning_rate": 1.959214847628504e-05, "loss": 3.2819, "step": 3450 }, { "epoch": 0.96, "learning_rate": 1.955396013136791e-05, "loss": 3.2275, "step": 3500 }, { "epoch": 0.98, "learning_rate": 1.9515771786450777e-05, "loss": 3.1519, "step": 3550 }, { "epoch": 0.99, "learning_rate": 1.9477583441533645e-05, "loss": 3.0939, "step": 3600 }, { "epoch": 1.0, "learning_rate": 1.9439395096616514e-05, "loss": 3.0504, "step": 3650 }, { "epoch": 1.02, "learning_rate": 1.9401206751699382e-05, "loss": 3.0155, "step": 3700 }, { "epoch": 1.03, "learning_rate": 1.9363018406782254e-05, "loss": 3.0241, "step": 3750 }, { "epoch": 1.04, "learning_rate": 1.9324830061865122e-05, "loss": 2.9585, "step": 3800 }, { "epoch": 1.06, "learning_rate": 1.928664171694799e-05, "loss": 3.0246, "step": 3850 }, { "epoch": 1.07, "learning_rate": 1.924845337203086e-05, "loss": 2.9085, "step": 3900 }, { "epoch": 1.09, "learning_rate": 1.9210265027113727e-05, "loss": 2.8855, "step": 3950 }, { "epoch": 1.1, "learning_rate": 1.9172076682196595e-05, "loss": 2.8794, "step": 4000 }, { "epoch": 1.11, "learning_rate": 1.9133888337279464e-05, "loss": 2.8453, "step": 4050 }, { "epoch": 1.13, "learning_rate": 1.9095699992362332e-05, "loss": 2.8964, "step": 4100 }, { "epoch": 1.14, "learning_rate": 1.90575116474452e-05, "loss": 2.802, "step": 4150 }, { "epoch": 1.15, "learning_rate": 1.901932330252807e-05, "loss": 2.7739, "step": 4200 }, { "epoch": 1.17, "learning_rate": 1.8981134957610937e-05, "loss": 2.7512, "step": 4250 }, { "epoch": 1.18, "learning_rate": 1.8942946612693805e-05, "loss": 2.7022, "step": 4300 }, { "epoch": 1.2, "learning_rate": 1.8904758267776673e-05, "loss": 2.6416, "step": 4350 }, { "epoch": 1.21, "learning_rate": 1.8866569922859545e-05, "loss": 2.5816, "step": 4400 }, { "epoch": 1.22, "learning_rate": 1.8828381577942414e-05, "loss": 2.5312, "step": 4450 }, { "epoch": 1.24, "learning_rate": 1.8790193233025282e-05, "loss": 2.4745, "step": 4500 }, { "epoch": 1.25, "learning_rate": 1.875200488810815e-05, "loss": 2.4173, "step": 4550 }, { "epoch": 1.26, "learning_rate": 1.871381654319102e-05, "loss": 2.3745, "step": 4600 }, { "epoch": 1.28, "learning_rate": 1.8675628198273887e-05, "loss": 2.3327, "step": 4650 }, { "epoch": 1.29, "learning_rate": 1.863743985335676e-05, "loss": 2.2474, "step": 4700 }, { "epoch": 1.31, "learning_rate": 1.8599251508439627e-05, "loss": 2.2341, "step": 4750 }, { "epoch": 1.32, "learning_rate": 1.8561063163522495e-05, "loss": 2.2965, "step": 4800 }, { "epoch": 1.33, "learning_rate": 1.8522874818605363e-05, "loss": 2.2222, "step": 4850 }, { "epoch": 1.35, "learning_rate": 1.8484686473688232e-05, "loss": 2.1754, "step": 4900 }, { "epoch": 1.36, "learning_rate": 1.84464981287711e-05, "loss": 2.07, "step": 4950 }, { "epoch": 1.37, "learning_rate": 1.8408309783853972e-05, "loss": 2.0051, "step": 5000 }, { "epoch": 1.37, "eval_loss": 1.7739078998565674, "eval_runtime": 5.9621, "eval_samples_per_second": 238.172, "eval_steps_per_second": 14.928, "eval_wer": 0.9581555777434874, "step": 5000 }, { "epoch": 1.39, "learning_rate": 1.837012143893684e-05, "loss": 1.9864, "step": 5050 }, { "epoch": 1.4, "learning_rate": 1.833193309401971e-05, "loss": 1.9581, "step": 5100 }, { "epoch": 1.42, "learning_rate": 1.8293744749102577e-05, "loss": 1.9008, "step": 5150 }, { "epoch": 1.43, "learning_rate": 1.8255556404185445e-05, "loss": 1.8577, "step": 5200 }, { "epoch": 1.44, "learning_rate": 1.8217368059268313e-05, "loss": 1.8355, "step": 5250 }, { "epoch": 1.46, "learning_rate": 1.8179179714351182e-05, "loss": 1.7973, "step": 5300 }, { "epoch": 1.47, "learning_rate": 1.814099136943405e-05, "loss": 1.7621, "step": 5350 }, { "epoch": 1.48, "learning_rate": 1.810280302451692e-05, "loss": 1.7396, "step": 5400 }, { "epoch": 1.5, "learning_rate": 1.8064614679599787e-05, "loss": 1.6799, "step": 5450 }, { "epoch": 1.51, "learning_rate": 1.8026426334682655e-05, "loss": 1.892, "step": 5500 }, { "epoch": 1.53, "learning_rate": 1.7988237989765523e-05, "loss": 1.6804, "step": 5550 }, { "epoch": 1.54, "learning_rate": 1.7950049644848392e-05, "loss": 1.6692, "step": 5600 }, { "epoch": 1.55, "learning_rate": 1.7911861299931263e-05, "loss": 1.5962, "step": 5650 }, { "epoch": 1.57, "learning_rate": 1.7873672955014132e-05, "loss": 1.5518, "step": 5700 }, { "epoch": 1.58, "learning_rate": 1.7835484610097e-05, "loss": 1.5826, "step": 5750 }, { "epoch": 1.59, "learning_rate": 1.779729626517987e-05, "loss": 1.5265, "step": 5800 }, { "epoch": 1.61, "learning_rate": 1.7759107920262737e-05, "loss": 1.6275, "step": 5850 }, { "epoch": 1.62, "learning_rate": 1.7720919575345605e-05, "loss": 1.6163, "step": 5900 }, { "epoch": 1.64, "learning_rate": 1.7682731230428473e-05, "loss": 1.4895, "step": 5950 }, { "epoch": 1.65, "learning_rate": 1.7644542885511345e-05, "loss": 1.499, "step": 6000 }, { "epoch": 1.66, "learning_rate": 1.7606354540594213e-05, "loss": 1.4724, "step": 6050 }, { "epoch": 1.68, "learning_rate": 1.7568166195677082e-05, "loss": 1.5417, "step": 6100 }, { "epoch": 1.69, "learning_rate": 1.752997785075995e-05, "loss": 1.4544, "step": 6150 }, { "epoch": 1.7, "learning_rate": 1.749178950584282e-05, "loss": 1.4092, "step": 6200 }, { "epoch": 1.72, "learning_rate": 1.7453601160925687e-05, "loss": 1.4378, "step": 6250 }, { "epoch": 1.73, "learning_rate": 1.7415412816008555e-05, "loss": 1.4193, "step": 6300 }, { "epoch": 1.75, "learning_rate": 1.7377224471091423e-05, "loss": 1.3293, "step": 6350 }, { "epoch": 1.76, "learning_rate": 1.7339036126174292e-05, "loss": 1.3812, "step": 6400 }, { "epoch": 1.77, "learning_rate": 1.7300847781257163e-05, "loss": 1.3594, "step": 6450 }, { "epoch": 1.79, "learning_rate": 1.7262659436340032e-05, "loss": 1.3096, "step": 6500 }, { "epoch": 1.8, "learning_rate": 1.72244710914229e-05, "loss": 1.4549, "step": 6550 }, { "epoch": 1.81, "learning_rate": 1.718628274650577e-05, "loss": 1.3352, "step": 6600 }, { "epoch": 1.83, "learning_rate": 1.7148094401588637e-05, "loss": 1.3192, "step": 6650 }, { "epoch": 1.84, "learning_rate": 1.7109906056671505e-05, "loss": 1.467, "step": 6700 }, { "epoch": 1.86, "learning_rate": 1.7071717711754373e-05, "loss": 1.3192, "step": 6750 }, { "epoch": 1.87, "learning_rate": 1.7033529366837242e-05, "loss": 1.3005, "step": 6800 }, { "epoch": 1.88, "learning_rate": 1.699534102192011e-05, "loss": 1.4218, "step": 6850 }, { "epoch": 1.9, "learning_rate": 1.695715267700298e-05, "loss": 1.2466, "step": 6900 }, { "epoch": 1.91, "learning_rate": 1.691896433208585e-05, "loss": 1.268, "step": 6950 }, { "epoch": 1.92, "learning_rate": 1.688077598716872e-05, "loss": 1.253, "step": 7000 }, { "epoch": 1.94, "learning_rate": 1.6842587642251587e-05, "loss": 1.2688, "step": 7050 }, { "epoch": 1.95, "learning_rate": 1.6804399297334455e-05, "loss": 1.2382, "step": 7100 }, { "epoch": 1.97, "learning_rate": 1.6766210952417323e-05, "loss": 1.2052, "step": 7150 }, { "epoch": 1.98, "learning_rate": 1.6728022607500192e-05, "loss": 1.1967, "step": 7200 }, { "epoch": 1.99, "learning_rate": 1.668983426258306e-05, "loss": 1.2206, "step": 7250 }, { "epoch": 2.01, "learning_rate": 1.6652409684564274e-05, "loss": 1.3895, "step": 7300 }, { "epoch": 2.02, "learning_rate": 1.6614221339647142e-05, "loss": 1.19, "step": 7350 }, { "epoch": 2.03, "learning_rate": 1.657603299473001e-05, "loss": 1.1813, "step": 7400 }, { "epoch": 2.05, "learning_rate": 1.653784464981288e-05, "loss": 1.188, "step": 7450 }, { "epoch": 2.06, "learning_rate": 1.6499656304895747e-05, "loss": 1.159, "step": 7500 }, { "epoch": 2.06, "eval_loss": 0.8389284014701843, "eval_runtime": 6.9684, "eval_samples_per_second": 203.776, "eval_steps_per_second": 12.772, "eval_wer": 0.6071525823726968, "step": 7500 }, { "epoch": 2.08, "learning_rate": 1.6461467959978615e-05, "loss": 1.2819, "step": 7550 }, { "epoch": 2.09, "learning_rate": 1.6423279615061484e-05, "loss": 1.1709, "step": 7600 }, { "epoch": 2.1, "learning_rate": 1.6385091270144352e-05, "loss": 1.1531, "step": 7650 }, { "epoch": 2.12, "learning_rate": 1.634690292522722e-05, "loss": 1.1985, "step": 7700 }, { "epoch": 2.13, "learning_rate": 1.630871458031009e-05, "loss": 1.2689, "step": 7750 }, { "epoch": 2.14, "learning_rate": 1.627052623539296e-05, "loss": 1.1277, "step": 7800 }, { "epoch": 2.16, "learning_rate": 1.623233789047583e-05, "loss": 1.1333, "step": 7850 }, { "epoch": 2.17, "learning_rate": 1.6194149545558697e-05, "loss": 1.144, "step": 7900 }, { "epoch": 2.19, "learning_rate": 1.6155961200641565e-05, "loss": 1.1036, "step": 7950 }, { "epoch": 2.2, "learning_rate": 1.6117772855724434e-05, "loss": 1.1772, "step": 8000 }, { "epoch": 2.21, "learning_rate": 1.6079584510807302e-05, "loss": 1.1035, "step": 8050 }, { "epoch": 2.23, "learning_rate": 1.604139616589017e-05, "loss": 1.1222, "step": 8100 }, { "epoch": 2.24, "learning_rate": 1.6003207820973042e-05, "loss": 1.2822, "step": 8150 }, { "epoch": 2.25, "learning_rate": 1.596501947605591e-05, "loss": 1.1146, "step": 8200 }, { "epoch": 2.27, "learning_rate": 1.592683113113878e-05, "loss": 1.2445, "step": 8250 }, { "epoch": 2.28, "learning_rate": 1.588940655311999e-05, "loss": 1.4394, "step": 8300 }, { "epoch": 2.3, "learning_rate": 1.5851218208202858e-05, "loss": 1.1401, "step": 8350 }, { "epoch": 2.31, "learning_rate": 1.5813029863285726e-05, "loss": 1.1043, "step": 8400 }, { "epoch": 2.32, "learning_rate": 1.5774841518368594e-05, "loss": 1.1007, "step": 8450 }, { "epoch": 2.34, "learning_rate": 1.5736653173451463e-05, "loss": 1.1019, "step": 8500 }, { "epoch": 2.35, "learning_rate": 1.569846482853433e-05, "loss": 1.2628, "step": 8550 }, { "epoch": 2.36, "learning_rate": 1.56602764836172e-05, "loss": 1.0788, "step": 8600 }, { "epoch": 2.38, "learning_rate": 1.562208813870007e-05, "loss": 1.0853, "step": 8650 }, { "epoch": 2.39, "learning_rate": 1.558389979378294e-05, "loss": 1.2869, "step": 8700 }, { "epoch": 2.41, "learning_rate": 1.5545711448865808e-05, "loss": 1.1101, "step": 8750 }, { "epoch": 2.42, "learning_rate": 1.5507523103948676e-05, "loss": 1.1989, "step": 8800 }, { "epoch": 2.43, "learning_rate": 1.5469334759031544e-05, "loss": 1.0792, "step": 8850 }, { "epoch": 2.45, "learning_rate": 1.5431146414114413e-05, "loss": 1.049, "step": 8900 }, { "epoch": 2.46, "learning_rate": 1.539295806919728e-05, "loss": 1.0721, "step": 8950 }, { "epoch": 2.47, "learning_rate": 1.5354769724280153e-05, "loss": 1.0897, "step": 9000 }, { "epoch": 2.49, "learning_rate": 1.531658137936302e-05, "loss": 1.0837, "step": 9050 }, { "epoch": 2.5, "learning_rate": 1.527839303444589e-05, "loss": 1.0433, "step": 9100 }, { "epoch": 2.52, "learning_rate": 1.5240204689528758e-05, "loss": 1.0797, "step": 9150 }, { "epoch": 2.53, "learning_rate": 1.5202016344611626e-05, "loss": 1.0605, "step": 9200 }, { "epoch": 2.54, "learning_rate": 1.5163827999694494e-05, "loss": 1.0587, "step": 9250 }, { "epoch": 2.56, "learning_rate": 1.5125639654777364e-05, "loss": 1.0383, "step": 9300 }, { "epoch": 2.57, "learning_rate": 1.5087451309860233e-05, "loss": 1.0343, "step": 9350 }, { "epoch": 2.58, "learning_rate": 1.5049262964943101e-05, "loss": 1.0322, "step": 9400 }, { "epoch": 2.6, "learning_rate": 1.501107462002597e-05, "loss": 1.1592, "step": 9450 }, { "epoch": 2.61, "learning_rate": 1.4972886275108838e-05, "loss": 1.0268, "step": 9500 }, { "epoch": 2.63, "learning_rate": 1.4934697930191706e-05, "loss": 1.237, "step": 9550 }, { "epoch": 2.64, "learning_rate": 1.4896509585274574e-05, "loss": 1.0148, "step": 9600 }, { "epoch": 2.65, "learning_rate": 1.4858321240357446e-05, "loss": 1.0558, "step": 9650 }, { "epoch": 2.67, "learning_rate": 1.4820132895440314e-05, "loss": 1.14, "step": 9700 }, { "epoch": 2.68, "learning_rate": 1.4781944550523183e-05, "loss": 1.0092, "step": 9750 }, { "epoch": 2.69, "learning_rate": 1.474375620560605e-05, "loss": 1.0289, "step": 9800 }, { "epoch": 2.71, "learning_rate": 1.470556786068892e-05, "loss": 1.0088, "step": 9850 }, { "epoch": 2.72, "learning_rate": 1.4667379515771787e-05, "loss": 1.0221, "step": 9900 }, { "epoch": 2.74, "learning_rate": 1.4629191170854658e-05, "loss": 1.0185, "step": 9950 }, { "epoch": 2.75, "learning_rate": 1.4591002825937526e-05, "loss": 1.0512, "step": 10000 }, { "epoch": 2.75, "eval_loss": 0.6868510842323303, "eval_runtime": 5.8578, "eval_samples_per_second": 242.411, "eval_steps_per_second": 15.193, "eval_wer": 0.4955977126259417, "step": 10000 }, { "epoch": 2.76, "learning_rate": 1.4552814481020394e-05, "loss": 1.1026, "step": 10050 }, { "epoch": 2.78, "learning_rate": 1.4514626136103262e-05, "loss": 0.9989, "step": 10100 }, { "epoch": 2.79, "learning_rate": 1.447643779118613e-05, "loss": 1.0077, "step": 10150 }, { "epoch": 2.8, "learning_rate": 1.4438249446268999e-05, "loss": 1.0371, "step": 10200 }, { "epoch": 2.82, "learning_rate": 1.4400061101351867e-05, "loss": 0.9885, "step": 10250 }, { "epoch": 2.83, "learning_rate": 1.4361872756434737e-05, "loss": 1.0181, "step": 10300 }, { "epoch": 2.85, "learning_rate": 1.4323684411517606e-05, "loss": 1.0209, "step": 10350 }, { "epoch": 2.86, "learning_rate": 1.4285496066600474e-05, "loss": 1.0023, "step": 10400 }, { "epoch": 2.87, "learning_rate": 1.4247307721683344e-05, "loss": 0.9934, "step": 10450 }, { "epoch": 2.89, "learning_rate": 1.4209119376766212e-05, "loss": 0.994, "step": 10500 }, { "epoch": 2.9, "learning_rate": 1.417093103184908e-05, "loss": 0.9673, "step": 10550 }, { "epoch": 2.91, "learning_rate": 1.413274268693195e-05, "loss": 0.9867, "step": 10600 }, { "epoch": 2.93, "learning_rate": 1.4094554342014819e-05, "loss": 0.9823, "step": 10650 }, { "epoch": 2.94, "learning_rate": 1.4056365997097687e-05, "loss": 1.0849, "step": 10700 }, { "epoch": 2.96, "learning_rate": 1.4018177652180556e-05, "loss": 0.98, "step": 10750 }, { "epoch": 2.97, "learning_rate": 1.3979989307263424e-05, "loss": 0.968, "step": 10800 }, { "epoch": 2.98, "learning_rate": 1.3941800962346292e-05, "loss": 0.9708, "step": 10850 }, { "epoch": 3.0, "learning_rate": 1.390361261742916e-05, "loss": 0.9496, "step": 10900 }, { "epoch": 3.01, "learning_rate": 1.386542427251203e-05, "loss": 1.0045, "step": 10950 }, { "epoch": 3.02, "learning_rate": 1.3827235927594899e-05, "loss": 0.9763, "step": 11000 }, { "epoch": 3.04, "learning_rate": 1.3789047582677767e-05, "loss": 0.9587, "step": 11050 }, { "epoch": 3.05, "learning_rate": 1.3750859237760636e-05, "loss": 0.9634, "step": 11100 }, { "epoch": 3.07, "learning_rate": 1.3712670892843504e-05, "loss": 1.0887, "step": 11150 }, { "epoch": 3.08, "learning_rate": 1.3674482547926372e-05, "loss": 0.9545, "step": 11200 }, { "epoch": 3.09, "learning_rate": 1.3636294203009244e-05, "loss": 1.0623, "step": 11250 }, { "epoch": 3.11, "learning_rate": 1.3598105858092112e-05, "loss": 0.9427, "step": 11300 }, { "epoch": 3.12, "learning_rate": 1.355991751317498e-05, "loss": 0.9258, "step": 11350 }, { "epoch": 3.13, "learning_rate": 1.3521729168257849e-05, "loss": 0.9696, "step": 11400 }, { "epoch": 3.15, "learning_rate": 1.3483540823340717e-05, "loss": 0.9528, "step": 11450 }, { "epoch": 3.16, "learning_rate": 1.3445352478423586e-05, "loss": 1.1304, "step": 11500 }, { "epoch": 3.18, "learning_rate": 1.3407164133506454e-05, "loss": 1.1216, "step": 11550 }, { "epoch": 3.19, "learning_rate": 1.3368975788589324e-05, "loss": 0.9327, "step": 11600 }, { "epoch": 3.2, "learning_rate": 1.3330787443672192e-05, "loss": 1.173, "step": 11650 }, { "epoch": 3.22, "learning_rate": 1.329259909875506e-05, "loss": 0.9033, "step": 11700 }, { "epoch": 3.23, "learning_rate": 1.3254410753837929e-05, "loss": 0.943, "step": 11750 }, { "epoch": 3.24, "learning_rate": 1.3216222408920797e-05, "loss": 1.0518, "step": 11800 }, { "epoch": 3.26, "learning_rate": 1.3178034064003666e-05, "loss": 0.9425, "step": 11850 }, { "epoch": 3.27, "learning_rate": 1.3139845719086537e-05, "loss": 0.9491, "step": 11900 }, { "epoch": 3.29, "learning_rate": 1.3101657374169406e-05, "loss": 0.9494, "step": 11950 }, { "epoch": 3.3, "learning_rate": 1.3063469029252274e-05, "loss": 0.9392, "step": 12000 }, { "epoch": 3.31, "learning_rate": 1.3025280684335142e-05, "loss": 0.9223, "step": 12050 }, { "epoch": 3.33, "learning_rate": 1.298709233941801e-05, "loss": 0.9135, "step": 12100 }, { "epoch": 3.34, "learning_rate": 1.2948903994500879e-05, "loss": 0.9082, "step": 12150 }, { "epoch": 3.35, "learning_rate": 1.2910715649583747e-05, "loss": 0.9618, "step": 12200 }, { "epoch": 3.37, "learning_rate": 1.2872527304666617e-05, "loss": 0.9322, "step": 12250 }, { "epoch": 3.38, "learning_rate": 1.2834338959749486e-05, "loss": 0.9062, "step": 12300 }, { "epoch": 3.4, "learning_rate": 1.2796150614832354e-05, "loss": 0.9721, "step": 12350 }, { "epoch": 3.41, "learning_rate": 1.2757962269915222e-05, "loss": 0.9503, "step": 12400 }, { "epoch": 3.42, "learning_rate": 1.2720537691896434e-05, "loss": 1.0868, "step": 12450 }, { "epoch": 3.44, "learning_rate": 1.2682349346979303e-05, "loss": 0.924, "step": 12500 }, { "epoch": 3.44, "eval_loss": 0.6139983534812927, "eval_runtime": 7.6281, "eval_samples_per_second": 186.153, "eval_steps_per_second": 11.667, "eval_wer": 0.44277026413724246, "step": 12500 }, { "epoch": 3.45, "learning_rate": 1.2644161002062171e-05, "loss": 0.9344, "step": 12550 }, { "epoch": 3.46, "learning_rate": 1.260597265714504e-05, "loss": 0.915, "step": 12600 }, { "epoch": 3.48, "learning_rate": 1.2567784312227908e-05, "loss": 1.0394, "step": 12650 }, { "epoch": 3.49, "learning_rate": 1.2529595967310778e-05, "loss": 0.9323, "step": 12700 }, { "epoch": 3.51, "learning_rate": 1.2491407622393648e-05, "loss": 0.9316, "step": 12750 }, { "epoch": 3.52, "learning_rate": 1.2453219277476516e-05, "loss": 0.8851, "step": 12800 }, { "epoch": 3.53, "learning_rate": 1.2415030932559384e-05, "loss": 0.963, "step": 12850 }, { "epoch": 3.55, "learning_rate": 1.2376842587642253e-05, "loss": 0.9073, "step": 12900 }, { "epoch": 3.56, "learning_rate": 1.2338654242725121e-05, "loss": 0.895, "step": 12950 }, { "epoch": 3.57, "learning_rate": 1.230046589780799e-05, "loss": 0.9309, "step": 13000 }, { "epoch": 3.59, "learning_rate": 1.2262277552890858e-05, "loss": 0.912, "step": 13050 }, { "epoch": 3.6, "learning_rate": 1.2224089207973728e-05, "loss": 0.9566, "step": 13100 }, { "epoch": 3.62, "learning_rate": 1.2185900863056596e-05, "loss": 1.1307, "step": 13150 }, { "epoch": 3.63, "learning_rate": 1.2147712518139464e-05, "loss": 0.9033, "step": 13200 }, { "epoch": 3.64, "learning_rate": 1.2109524173222333e-05, "loss": 0.9376, "step": 13250 }, { "epoch": 3.66, "learning_rate": 1.2071335828305201e-05, "loss": 0.9115, "step": 13300 }, { "epoch": 3.67, "learning_rate": 1.203314748338807e-05, "loss": 0.9085, "step": 13350 }, { "epoch": 3.68, "learning_rate": 1.1994959138470941e-05, "loss": 0.9547, "step": 13400 }, { "epoch": 3.7, "learning_rate": 1.195677079355381e-05, "loss": 0.9157, "step": 13450 }, { "epoch": 3.71, "learning_rate": 1.1918582448636678e-05, "loss": 1.0078, "step": 13500 }, { "epoch": 3.73, "learning_rate": 1.1880394103719546e-05, "loss": 0.8926, "step": 13550 }, { "epoch": 3.74, "learning_rate": 1.1842205758802414e-05, "loss": 1.0213, "step": 13600 }, { "epoch": 3.75, "learning_rate": 1.1804017413885283e-05, "loss": 0.9104, "step": 13650 }, { "epoch": 3.77, "learning_rate": 1.1765829068968151e-05, "loss": 0.8892, "step": 13700 }, { "epoch": 3.78, "learning_rate": 1.1727640724051021e-05, "loss": 0.9327, "step": 13750 }, { "epoch": 3.79, "learning_rate": 1.168945237913389e-05, "loss": 0.9209, "step": 13800 }, { "epoch": 3.81, "learning_rate": 1.1651264034216758e-05, "loss": 0.9761, "step": 13850 }, { "epoch": 3.82, "learning_rate": 1.1613075689299626e-05, "loss": 0.8993, "step": 13900 }, { "epoch": 3.84, "learning_rate": 1.1574887344382494e-05, "loss": 0.8803, "step": 13950 }, { "epoch": 3.85, "learning_rate": 1.1536698999465363e-05, "loss": 0.8937, "step": 14000 }, { "epoch": 3.86, "learning_rate": 1.1498510654548234e-05, "loss": 0.9081, "step": 14050 }, { "epoch": 3.88, "learning_rate": 1.1460322309631103e-05, "loss": 0.897, "step": 14100 }, { "epoch": 3.89, "learning_rate": 1.1422133964713971e-05, "loss": 0.9021, "step": 14150 }, { "epoch": 3.9, "learning_rate": 1.138394561979684e-05, "loss": 0.9063, "step": 14200 }, { "epoch": 3.92, "learning_rate": 1.1345757274879708e-05, "loss": 0.9215, "step": 14250 }, { "epoch": 3.93, "learning_rate": 1.1307568929962576e-05, "loss": 1.1618, "step": 14300 }, { "epoch": 3.95, "learning_rate": 1.1269380585045444e-05, "loss": 0.8714, "step": 14350 }, { "epoch": 3.96, "learning_rate": 1.1231192240128314e-05, "loss": 0.9932, "step": 14400 }, { "epoch": 3.97, "learning_rate": 1.1193003895211183e-05, "loss": 0.8614, "step": 14450 }, { "epoch": 3.99, "learning_rate": 1.1154815550294051e-05, "loss": 0.9436, "step": 14500 }, { "epoch": 4.0, "learning_rate": 1.111662720537692e-05, "loss": 0.9137, "step": 14550 }, { "epoch": 4.01, "learning_rate": 1.1078438860459788e-05, "loss": 0.8823, "step": 14600 }, { "epoch": 4.03, "learning_rate": 1.1040250515542656e-05, "loss": 0.8566, "step": 14650 }, { "epoch": 4.04, "learning_rate": 1.1002062170625528e-05, "loss": 0.8932, "step": 14700 }, { "epoch": 4.06, "learning_rate": 1.0963873825708396e-05, "loss": 0.897, "step": 14750 }, { "epoch": 4.07, "learning_rate": 1.0925685480791264e-05, "loss": 0.8812, "step": 14800 }, { "epoch": 4.08, "learning_rate": 1.0887497135874133e-05, "loss": 0.8574, "step": 14850 }, { "epoch": 4.1, "learning_rate": 1.0850072557855345e-05, "loss": 1.0636, "step": 14900 }, { "epoch": 4.11, "learning_rate": 1.0811884212938213e-05, "loss": 0.8861, "step": 14950 }, { "epoch": 4.12, "learning_rate": 1.0773695868021082e-05, "loss": 0.8536, "step": 15000 }, { "epoch": 4.12, "eval_loss": 0.5816845297813416, "eval_runtime": 8.1181, "eval_samples_per_second": 174.918, "eval_steps_per_second": 10.963, "eval_wer": 0.4120904057365889, "step": 15000 }, { "epoch": 4.14, "learning_rate": 1.073550752310395e-05, "loss": 0.872, "step": 15050 }, { "epoch": 4.15, "learning_rate": 1.0697319178186818e-05, "loss": 0.8631, "step": 15100 }, { "epoch": 4.17, "learning_rate": 1.0659130833269686e-05, "loss": 0.8651, "step": 15150 }, { "epoch": 4.18, "learning_rate": 1.0620942488352555e-05, "loss": 0.8493, "step": 15200 }, { "epoch": 4.19, "learning_rate": 1.0582754143435425e-05, "loss": 0.8854, "step": 15250 }, { "epoch": 4.21, "learning_rate": 1.0544565798518293e-05, "loss": 0.8702, "step": 15300 }, { "epoch": 4.22, "learning_rate": 1.0506377453601161e-05, "loss": 0.8751, "step": 15350 }, { "epoch": 4.23, "learning_rate": 1.046818910868403e-05, "loss": 0.8496, "step": 15400 }, { "epoch": 4.25, "learning_rate": 1.0430000763766898e-05, "loss": 0.893, "step": 15450 }, { "epoch": 4.26, "learning_rate": 1.0391812418849766e-05, "loss": 1.0069, "step": 15500 }, { "epoch": 4.28, "learning_rate": 1.0353624073932638e-05, "loss": 0.8629, "step": 15550 }, { "epoch": 4.29, "learning_rate": 1.0315435729015506e-05, "loss": 0.9775, "step": 15600 }, { "epoch": 4.3, "learning_rate": 1.0277247384098375e-05, "loss": 0.8736, "step": 15650 }, { "epoch": 4.32, "learning_rate": 1.0239059039181243e-05, "loss": 0.8875, "step": 15700 }, { "epoch": 4.33, "learning_rate": 1.0201634461162455e-05, "loss": 1.0019, "step": 15750 }, { "epoch": 4.34, "learning_rate": 1.0163446116245324e-05, "loss": 0.8482, "step": 15800 }, { "epoch": 4.36, "learning_rate": 1.0125257771328192e-05, "loss": 0.8552, "step": 15850 }, { "epoch": 4.37, "learning_rate": 1.008706942641106e-05, "loss": 0.9918, "step": 15900 }, { "epoch": 4.39, "learning_rate": 1.0048881081493929e-05, "loss": 0.877, "step": 15950 }, { "epoch": 4.4, "learning_rate": 1.0010692736576797e-05, "loss": 0.8536, "step": 16000 }, { "epoch": 4.41, "learning_rate": 9.972504391659667e-06, "loss": 0.8501, "step": 16050 }, { "epoch": 4.43, "learning_rate": 9.934316046742535e-06, "loss": 0.8529, "step": 16100 }, { "epoch": 4.44, "learning_rate": 9.896127701825404e-06, "loss": 0.8721, "step": 16150 }, { "epoch": 4.45, "learning_rate": 9.857939356908272e-06, "loss": 0.8624, "step": 16200 }, { "epoch": 4.47, "learning_rate": 9.81975101199114e-06, "loss": 0.8553, "step": 16250 }, { "epoch": 4.48, "learning_rate": 9.781562667074009e-06, "loss": 0.8438, "step": 16300 }, { "epoch": 4.5, "learning_rate": 9.743374322156879e-06, "loss": 1.0249, "step": 16350 }, { "epoch": 4.51, "learning_rate": 9.70594974413809e-06, "loss": 0.971, "step": 16400 }, { "epoch": 4.52, "learning_rate": 9.667761399220959e-06, "loss": 0.9721, "step": 16450 }, { "epoch": 4.54, "learning_rate": 9.629573054303827e-06, "loss": 0.8466, "step": 16500 }, { "epoch": 4.55, "learning_rate": 9.591384709386696e-06, "loss": 0.8697, "step": 16550 }, { "epoch": 4.56, "learning_rate": 9.553196364469564e-06, "loss": 0.851, "step": 16600 }, { "epoch": 4.58, "learning_rate": 9.515008019552432e-06, "loss": 0.8847, "step": 16650 }, { "epoch": 4.59, "learning_rate": 9.476819674635302e-06, "loss": 0.9977, "step": 16700 }, { "epoch": 4.61, "learning_rate": 9.43863132971817e-06, "loss": 0.8584, "step": 16750 }, { "epoch": 4.62, "learning_rate": 9.400442984801039e-06, "loss": 0.8572, "step": 16800 }, { "epoch": 4.63, "learning_rate": 9.362254639883909e-06, "loss": 0.8482, "step": 16850 }, { "epoch": 4.65, "learning_rate": 9.324066294966777e-06, "loss": 0.8498, "step": 16900 }, { "epoch": 4.66, "learning_rate": 9.285877950049646e-06, "loss": 0.8706, "step": 16950 }, { "epoch": 4.67, "learning_rate": 9.247689605132514e-06, "loss": 0.8794, "step": 17000 }, { "epoch": 4.69, "learning_rate": 9.209501260215384e-06, "loss": 0.8557, "step": 17050 }, { "epoch": 4.7, "learning_rate": 9.171312915298252e-06, "loss": 0.9481, "step": 17100 }, { "epoch": 4.72, "learning_rate": 9.13312457038112e-06, "loss": 0.8453, "step": 17150 }, { "epoch": 4.73, "learning_rate": 9.094936225463989e-06, "loss": 0.8666, "step": 17200 }, { "epoch": 4.74, "learning_rate": 9.056747880546857e-06, "loss": 0.8673, "step": 17250 }, { "epoch": 4.76, "learning_rate": 9.018559535629726e-06, "loss": 0.953, "step": 17300 }, { "epoch": 4.77, "learning_rate": 8.980371190712596e-06, "loss": 0.9799, "step": 17350 }, { "epoch": 4.78, "learning_rate": 8.942182845795464e-06, "loss": 0.8246, "step": 17400 }, { "epoch": 4.8, "learning_rate": 8.903994500878332e-06, "loss": 0.8363, "step": 17450 }, { "epoch": 4.81, "learning_rate": 8.865806155961202e-06, "loss": 0.8607, "step": 17500 }, { "epoch": 4.81, "eval_loss": 0.5505547523498535, "eval_runtime": 14.271, "eval_samples_per_second": 99.502, "eval_steps_per_second": 6.236, "eval_wer": 0.38957974040119814, "step": 17500 }, { "epoch": 4.83, "learning_rate": 8.82761781104407e-06, "loss": 0.8093, "step": 17550 }, { "epoch": 4.84, "learning_rate": 8.789429466126939e-06, "loss": 0.9191, "step": 17600 }, { "epoch": 4.85, "learning_rate": 8.751241121209807e-06, "loss": 0.8336, "step": 17650 }, { "epoch": 4.87, "learning_rate": 8.713052776292677e-06, "loss": 0.8377, "step": 17700 }, { "epoch": 4.88, "learning_rate": 8.674864431375546e-06, "loss": 0.8474, "step": 17750 }, { "epoch": 4.89, "learning_rate": 8.636676086458414e-06, "loss": 0.8395, "step": 17800 }, { "epoch": 4.91, "learning_rate": 8.598487741541282e-06, "loss": 0.8466, "step": 17850 }, { "epoch": 4.92, "learning_rate": 8.56029939662415e-06, "loss": 0.8127, "step": 17900 }, { "epoch": 4.94, "learning_rate": 8.522111051707019e-06, "loss": 0.8508, "step": 17950 }, { "epoch": 4.95, "learning_rate": 8.483922706789889e-06, "loss": 0.8101, "step": 18000 }, { "epoch": 4.96, "learning_rate": 8.445734361872757e-06, "loss": 0.84, "step": 18050 }, { "epoch": 4.98, "learning_rate": 8.407546016955626e-06, "loss": 0.8596, "step": 18100 }, { "epoch": 4.99, "learning_rate": 8.369357672038496e-06, "loss": 1.148, "step": 18150 }, { "epoch": 5.0, "learning_rate": 8.331169327121364e-06, "loss": 0.8248, "step": 18200 }, { "epoch": 5.02, "learning_rate": 8.292980982204232e-06, "loss": 0.8412, "step": 18250 }, { "epoch": 5.03, "learning_rate": 8.2547926372871e-06, "loss": 0.8536, "step": 18300 }, { "epoch": 5.05, "learning_rate": 8.216604292369969e-06, "loss": 0.8246, "step": 18350 }, { "epoch": 5.06, "learning_rate": 8.178415947452837e-06, "loss": 0.8256, "step": 18400 }, { "epoch": 5.07, "learning_rate": 8.140227602535706e-06, "loss": 0.9476, "step": 18450 }, { "epoch": 5.09, "learning_rate": 8.102039257618576e-06, "loss": 0.8296, "step": 18500 }, { "epoch": 5.1, "learning_rate": 8.063850912701444e-06, "loss": 0.8177, "step": 18550 }, { "epoch": 5.11, "learning_rate": 8.025662567784312e-06, "loss": 0.8098, "step": 18600 }, { "epoch": 5.13, "learning_rate": 7.987474222867182e-06, "loss": 0.867, "step": 18650 }, { "epoch": 5.14, "learning_rate": 7.94928587795005e-06, "loss": 0.8207, "step": 18700 }, { "epoch": 5.16, "learning_rate": 7.911097533032919e-06, "loss": 0.9099, "step": 18750 }, { "epoch": 5.17, "learning_rate": 7.872909188115789e-06, "loss": 0.8382, "step": 18800 }, { "epoch": 5.18, "learning_rate": 7.834720843198657e-06, "loss": 0.8608, "step": 18850 }, { "epoch": 5.2, "learning_rate": 7.796532498281526e-06, "loss": 1.0387, "step": 18900 }, { "epoch": 5.21, "learning_rate": 7.758344153364394e-06, "loss": 0.9698, "step": 18950 }, { "epoch": 5.22, "learning_rate": 7.720919575345606e-06, "loss": 0.9612, "step": 19000 }, { "epoch": 5.24, "learning_rate": 7.682731230428474e-06, "loss": 0.8185, "step": 19050 }, { "epoch": 5.25, "learning_rate": 7.644542885511343e-06, "loss": 0.8188, "step": 19100 }, { "epoch": 5.27, "learning_rate": 7.606354540594211e-06, "loss": 0.8036, "step": 19150 }, { "epoch": 5.28, "learning_rate": 7.56816619567708e-06, "loss": 0.8257, "step": 19200 }, { "epoch": 5.29, "learning_rate": 7.5299778507599485e-06, "loss": 0.8049, "step": 19250 }, { "epoch": 5.31, "learning_rate": 7.491789505842817e-06, "loss": 0.8208, "step": 19300 }, { "epoch": 5.32, "learning_rate": 7.453601160925687e-06, "loss": 0.8078, "step": 19350 }, { "epoch": 5.33, "learning_rate": 7.415412816008555e-06, "loss": 0.8315, "step": 19400 }, { "epoch": 5.35, "learning_rate": 7.3772244710914234e-06, "loss": 0.8205, "step": 19450 }, { "epoch": 5.36, "learning_rate": 7.339036126174293e-06, "loss": 0.8152, "step": 19500 }, { "epoch": 5.38, "learning_rate": 7.300847781257161e-06, "loss": 0.8056, "step": 19550 }, { "epoch": 5.39, "learning_rate": 7.262659436340029e-06, "loss": 0.8023, "step": 19600 }, { "epoch": 5.4, "learning_rate": 7.2244710914228984e-06, "loss": 0.881, "step": 19650 }, { "epoch": 5.42, "learning_rate": 7.186282746505767e-06, "loss": 0.8111, "step": 19700 }, { "epoch": 5.43, "learning_rate": 7.148094401588636e-06, "loss": 1.0644, "step": 19750 }, { "epoch": 5.44, "learning_rate": 7.109906056671504e-06, "loss": 1.0396, "step": 19800 }, { "epoch": 5.46, "learning_rate": 7.0717177117543734e-06, "loss": 0.9269, "step": 19850 }, { "epoch": 5.47, "learning_rate": 7.033529366837242e-06, "loss": 0.8443, "step": 19900 }, { "epoch": 5.49, "learning_rate": 6.99534102192011e-06, "loss": 0.8458, "step": 19950 }, { "epoch": 5.5, "learning_rate": 6.957152677002979e-06, "loss": 0.8019, "step": 20000 }, { "epoch": 5.5, "eval_loss": 0.5279287099838257, "eval_runtime": 9.2651, "eval_samples_per_second": 153.263, "eval_steps_per_second": 9.606, "eval_wer": 0.37315058545883634, "step": 20000 }, { "epoch": 5.51, "learning_rate": 6.918964332085848e-06, "loss": 0.8175, "step": 20050 }, { "epoch": 5.53, "learning_rate": 6.880775987168716e-06, "loss": 0.7983, "step": 20100 }, { "epoch": 5.54, "learning_rate": 6.842587642251586e-06, "loss": 0.8181, "step": 20150 }, { "epoch": 5.55, "learning_rate": 6.804399297334454e-06, "loss": 0.8068, "step": 20200 }, { "epoch": 5.57, "learning_rate": 6.766210952417323e-06, "loss": 0.781, "step": 20250 }, { "epoch": 5.58, "learning_rate": 6.728022607500192e-06, "loss": 0.8165, "step": 20300 }, { "epoch": 5.6, "learning_rate": 6.68983426258306e-06, "loss": 0.7883, "step": 20350 }, { "epoch": 5.61, "learning_rate": 6.651645917665928e-06, "loss": 0.8357, "step": 20400 }, { "epoch": 5.62, "learning_rate": 6.613457572748797e-06, "loss": 0.7844, "step": 20450 }, { "epoch": 5.64, "learning_rate": 6.575269227831667e-06, "loss": 0.8352, "step": 20500 }, { "epoch": 5.65, "learning_rate": 6.537080882914535e-06, "loss": 0.8296, "step": 20550 }, { "epoch": 5.66, "learning_rate": 6.498892537997403e-06, "loss": 0.7926, "step": 20600 }, { "epoch": 5.68, "learning_rate": 6.4607041930802726e-06, "loss": 0.7944, "step": 20650 }, { "epoch": 5.69, "learning_rate": 6.422515848163141e-06, "loss": 0.8283, "step": 20700 }, { "epoch": 5.71, "learning_rate": 6.384327503246009e-06, "loss": 0.8099, "step": 20750 }, { "epoch": 5.72, "learning_rate": 6.346139158328879e-06, "loss": 0.8049, "step": 20800 }, { "epoch": 5.73, "learning_rate": 6.3079508134117476e-06, "loss": 0.9324, "step": 20850 }, { "epoch": 5.75, "learning_rate": 6.269762468494616e-06, "loss": 0.8097, "step": 20900 }, { "epoch": 5.76, "learning_rate": 6.231574123577485e-06, "loss": 0.981, "step": 20950 }, { "epoch": 5.77, "learning_rate": 6.193385778660353e-06, "loss": 0.8396, "step": 21000 }, { "epoch": 5.79, "learning_rate": 6.155197433743222e-06, "loss": 0.7962, "step": 21050 }, { "epoch": 5.8, "learning_rate": 6.11700908882609e-06, "loss": 0.8299, "step": 21100 }, { "epoch": 5.82, "learning_rate": 6.07882074390896e-06, "loss": 0.8123, "step": 21150 }, { "epoch": 5.83, "learning_rate": 6.040632398991828e-06, "loss": 0.8133, "step": 21200 }, { "epoch": 5.84, "learning_rate": 6.002444054074697e-06, "loss": 0.8085, "step": 21250 }, { "epoch": 5.86, "learning_rate": 5.964255709157566e-06, "loss": 0.7978, "step": 21300 }, { "epoch": 5.87, "learning_rate": 5.926067364240434e-06, "loss": 0.7933, "step": 21350 }, { "epoch": 5.88, "learning_rate": 5.8878790193233025e-06, "loss": 0.9455, "step": 21400 }, { "epoch": 5.9, "learning_rate": 5.849690674406172e-06, "loss": 0.8158, "step": 21450 }, { "epoch": 5.91, "learning_rate": 5.811502329489041e-06, "loss": 0.7701, "step": 21500 }, { "epoch": 5.93, "learning_rate": 5.773313984571909e-06, "loss": 0.8197, "step": 21550 }, { "epoch": 5.94, "learning_rate": 5.735125639654778e-06, "loss": 0.8038, "step": 21600 }, { "epoch": 5.95, "learning_rate": 5.696937294737647e-06, "loss": 0.8293, "step": 21650 }, { "epoch": 5.97, "learning_rate": 5.658748949820515e-06, "loss": 0.9085, "step": 21700 }, { "epoch": 5.98, "learning_rate": 5.620560604903383e-06, "loss": 0.8422, "step": 21750 }, { "epoch": 5.99, "learning_rate": 5.5823722599862525e-06, "loss": 0.8218, "step": 21800 }, { "epoch": 6.01, "learning_rate": 5.544183915069121e-06, "loss": 0.8093, "step": 21850 }, { "epoch": 6.02, "learning_rate": 5.50599557015199e-06, "loss": 0.832, "step": 21900 }, { "epoch": 6.04, "learning_rate": 5.467807225234859e-06, "loss": 0.8237, "step": 21950 }, { "epoch": 6.05, "learning_rate": 5.4296188803177275e-06, "loss": 0.8167, "step": 22000 }, { "epoch": 6.06, "learning_rate": 5.391430535400596e-06, "loss": 0.8082, "step": 22050 }, { "epoch": 6.08, "learning_rate": 5.353242190483465e-06, "loss": 0.7876, "step": 22100 }, { "epoch": 6.09, "learning_rate": 5.315053845566333e-06, "loss": 0.8083, "step": 22150 }, { "epoch": 6.1, "learning_rate": 5.276865500649202e-06, "loss": 0.7984, "step": 22200 }, { "epoch": 6.12, "learning_rate": 5.238677155732072e-06, "loss": 0.9004, "step": 22250 }, { "epoch": 6.13, "learning_rate": 5.20048881081494e-06, "loss": 0.9371, "step": 22300 }, { "epoch": 6.15, "learning_rate": 5.162300465897808e-06, "loss": 0.9698, "step": 22350 }, { "epoch": 6.16, "learning_rate": 5.124112120980677e-06, "loss": 0.9097, "step": 22400 }, { "epoch": 6.17, "learning_rate": 5.085923776063546e-06, "loss": 0.8079, "step": 22450 }, { "epoch": 6.19, "learning_rate": 5.047735431146414e-06, "loss": 0.8105, "step": 22500 }, { "epoch": 6.19, "eval_loss": 0.5264317393302917, "eval_runtime": 17.6342, "eval_samples_per_second": 80.526, "eval_steps_per_second": 5.047, "eval_wer": 0.36116910229645094, "step": 22500 }, { "epoch": 6.2, "learning_rate": 5.0095470862292825e-06, "loss": 0.8089, "step": 22550 }, { "epoch": 6.21, "learning_rate": 4.971358741312152e-06, "loss": 0.7828, "step": 22600 }, { "epoch": 6.23, "learning_rate": 4.933170396395021e-06, "loss": 0.8145, "step": 22650 }, { "epoch": 6.24, "learning_rate": 4.894982051477889e-06, "loss": 0.8245, "step": 22700 }, { "epoch": 6.26, "learning_rate": 4.8567937065607575e-06, "loss": 0.7974, "step": 22750 }, { "epoch": 6.27, "learning_rate": 4.818605361643627e-06, "loss": 1.0031, "step": 22800 }, { "epoch": 6.28, "learning_rate": 4.780417016726496e-06, "loss": 0.8235, "step": 22850 }, { "epoch": 6.3, "learning_rate": 4.742992438707707e-06, "loss": 0.9321, "step": 22900 }, { "epoch": 6.31, "learning_rate": 4.704804093790575e-06, "loss": 0.7859, "step": 22950 }, { "epoch": 6.32, "learning_rate": 4.666615748873445e-06, "loss": 0.9391, "step": 23000 }, { "epoch": 6.34, "learning_rate": 4.628427403956313e-06, "loss": 0.7873, "step": 23050 }, { "epoch": 6.35, "learning_rate": 4.590239059039181e-06, "loss": 0.7935, "step": 23100 }, { "epoch": 6.37, "learning_rate": 4.55205071412205e-06, "loss": 0.7951, "step": 23150 }, { "epoch": 6.38, "learning_rate": 4.513862369204919e-06, "loss": 0.9234, "step": 23200 }, { "epoch": 6.39, "learning_rate": 4.475674024287788e-06, "loss": 0.793, "step": 23250 }, { "epoch": 6.41, "learning_rate": 4.437485679370657e-06, "loss": 0.8021, "step": 23300 }, { "epoch": 6.42, "learning_rate": 4.399297334453525e-06, "loss": 0.8176, "step": 23350 }, { "epoch": 6.43, "learning_rate": 4.361108989536394e-06, "loss": 0.8142, "step": 23400 }, { "epoch": 6.45, "learning_rate": 4.322920644619262e-06, "loss": 0.8223, "step": 23450 }, { "epoch": 6.46, "learning_rate": 4.284732299702131e-06, "loss": 0.7836, "step": 23500 }, { "epoch": 6.48, "learning_rate": 4.246543954785e-06, "loss": 0.8061, "step": 23550 }, { "epoch": 6.49, "learning_rate": 4.208355609867869e-06, "loss": 0.7834, "step": 23600 }, { "epoch": 6.5, "learning_rate": 4.170167264950737e-06, "loss": 0.95, "step": 23650 }, { "epoch": 6.52, "learning_rate": 4.131978920033606e-06, "loss": 0.8053, "step": 23700 }, { "epoch": 6.53, "learning_rate": 4.0937905751164745e-06, "loss": 0.863, "step": 23750 }, { "epoch": 6.54, "learning_rate": 4.055602230199344e-06, "loss": 0.787, "step": 23800 }, { "epoch": 6.56, "learning_rate": 4.017413885282212e-06, "loss": 0.7837, "step": 23850 }, { "epoch": 6.57, "learning_rate": 3.979225540365081e-06, "loss": 0.799, "step": 23900 }, { "epoch": 6.59, "learning_rate": 3.9410371954479495e-06, "loss": 0.7908, "step": 23950 }, { "epoch": 6.6, "learning_rate": 3.902848850530818e-06, "loss": 0.7963, "step": 24000 }, { "epoch": 6.61, "learning_rate": 3.864660505613687e-06, "loss": 0.8737, "step": 24050 }, { "epoch": 6.63, "learning_rate": 3.826472160696555e-06, "loss": 0.803, "step": 24100 }, { "epoch": 6.64, "learning_rate": 3.7882838157794245e-06, "loss": 0.7812, "step": 24150 }, { "epoch": 6.65, "learning_rate": 3.7500954708622933e-06, "loss": 0.7602, "step": 24200 }, { "epoch": 6.67, "learning_rate": 3.7119071259451616e-06, "loss": 0.8042, "step": 24250 }, { "epoch": 6.68, "learning_rate": 3.6737187810280308e-06, "loss": 0.7978, "step": 24300 }, { "epoch": 6.7, "learning_rate": 3.6355304361108995e-06, "loss": 0.834, "step": 24350 }, { "epoch": 6.71, "learning_rate": 3.597342091193768e-06, "loss": 0.8157, "step": 24400 }, { "epoch": 6.72, "learning_rate": 3.5591537462766366e-06, "loss": 0.8266, "step": 24450 }, { "epoch": 6.74, "learning_rate": 3.5209654013595053e-06, "loss": 0.7912, "step": 24500 }, { "epoch": 6.75, "learning_rate": 3.482777056442374e-06, "loss": 0.7912, "step": 24550 }, { "epoch": 6.76, "learning_rate": 3.444588711525243e-06, "loss": 0.7894, "step": 24600 }, { "epoch": 6.78, "learning_rate": 3.406400366608111e-06, "loss": 0.7793, "step": 24650 }, { "epoch": 6.79, "learning_rate": 3.3682120216909803e-06, "loss": 0.7824, "step": 24700 }, { "epoch": 6.81, "learning_rate": 3.3300236767738487e-06, "loss": 0.7955, "step": 24750 }, { "epoch": 6.82, "learning_rate": 3.2918353318567174e-06, "loss": 0.7815, "step": 24800 }, { "epoch": 6.83, "learning_rate": 3.2536469869395866e-06, "loss": 0.8559, "step": 24850 }, { "epoch": 6.85, "learning_rate": 3.215458642022455e-06, "loss": 0.9687, "step": 24900 }, { "epoch": 6.86, "learning_rate": 3.1772702971053237e-06, "loss": 0.7589, "step": 24950 }, { "epoch": 6.87, "learning_rate": 3.139081952188193e-06, "loss": 0.881, "step": 25000 }, { "epoch": 6.87, "eval_loss": 0.5102140307426453, "eval_runtime": 14.613, "eval_samples_per_second": 97.173, "eval_steps_per_second": 6.09, "eval_wer": 0.3605337206135972, "step": 25000 }, { "epoch": 6.89, "learning_rate": 3.100893607271061e-06, "loss": 0.7757, "step": 25050 }, { "epoch": 6.9, "learning_rate": 3.06270526235393e-06, "loss": 0.8149, "step": 25100 }, { "epoch": 6.92, "learning_rate": 3.0245169174367982e-06, "loss": 0.9439, "step": 25150 }, { "epoch": 6.93, "learning_rate": 2.9863285725196674e-06, "loss": 0.7954, "step": 25200 }, { "epoch": 6.94, "learning_rate": 2.948140227602536e-06, "loss": 0.8041, "step": 25250 }, { "epoch": 6.96, "learning_rate": 2.9099518826854045e-06, "loss": 0.8766, "step": 25300 }, { "epoch": 6.97, "learning_rate": 2.8717635377682732e-06, "loss": 0.809, "step": 25350 }, { "epoch": 6.98, "learning_rate": 2.833575192851142e-06, "loss": 0.8029, "step": 25400 }, { "epoch": 7.0, "learning_rate": 2.7953868479340107e-06, "loss": 0.7913, "step": 25450 }, { "epoch": 7.01, "learning_rate": 2.7571985030168795e-06, "loss": 0.7911, "step": 25500 }, { "epoch": 7.03, "learning_rate": 2.719010158099748e-06, "loss": 0.7928, "step": 25550 }, { "epoch": 7.04, "learning_rate": 2.680821813182617e-06, "loss": 0.923, "step": 25600 }, { "epoch": 7.05, "learning_rate": 2.6426334682654857e-06, "loss": 0.7777, "step": 25650 }, { "epoch": 7.07, "learning_rate": 2.604445123348354e-06, "loss": 0.7951, "step": 25700 }, { "epoch": 7.08, "learning_rate": 2.5662567784312232e-06, "loss": 0.7645, "step": 25750 }, { "epoch": 7.09, "learning_rate": 2.5280684335140915e-06, "loss": 0.776, "step": 25800 }, { "epoch": 7.11, "learning_rate": 2.4898800885969603e-06, "loss": 0.9266, "step": 25850 }, { "epoch": 7.12, "learning_rate": 2.451691743679829e-06, "loss": 0.8739, "step": 25900 }, { "epoch": 7.14, "learning_rate": 2.413503398762698e-06, "loss": 0.7744, "step": 25950 }, { "epoch": 7.15, "learning_rate": 2.3753150538455665e-06, "loss": 0.9439, "step": 26000 }, { "epoch": 7.16, "learning_rate": 2.3371267089284353e-06, "loss": 0.7762, "step": 26050 }, { "epoch": 7.18, "learning_rate": 2.298938364011304e-06, "loss": 0.8767, "step": 26100 }, { "epoch": 7.19, "learning_rate": 2.2607500190941724e-06, "loss": 0.8231, "step": 26150 }, { "epoch": 7.2, "learning_rate": 2.2225616741770415e-06, "loss": 0.8696, "step": 26200 }, { "epoch": 7.22, "learning_rate": 2.1843733292599103e-06, "loss": 0.7699, "step": 26250 }, { "epoch": 7.23, "learning_rate": 2.1461849843427786e-06, "loss": 0.7744, "step": 26300 }, { "epoch": 7.24, "learning_rate": 2.1079966394256474e-06, "loss": 0.811, "step": 26350 }, { "epoch": 7.26, "learning_rate": 2.069808294508516e-06, "loss": 0.7884, "step": 26400 }, { "epoch": 7.27, "learning_rate": 2.031619949591385e-06, "loss": 0.7898, "step": 26450 }, { "epoch": 7.29, "learning_rate": 1.9934316046742536e-06, "loss": 0.7554, "step": 26500 }, { "epoch": 7.3, "learning_rate": 1.9552432597571224e-06, "loss": 0.9014, "step": 26550 }, { "epoch": 7.31, "learning_rate": 1.917054914839991e-06, "loss": 0.8138, "step": 26600 }, { "epoch": 7.33, "learning_rate": 1.8788665699228598e-06, "loss": 0.7727, "step": 26650 }, { "epoch": 7.34, "learning_rate": 1.8406782250057284e-06, "loss": 0.8096, "step": 26700 }, { "epoch": 7.35, "learning_rate": 1.8024898800885971e-06, "loss": 0.8095, "step": 26750 }, { "epoch": 7.37, "learning_rate": 1.7643015351714657e-06, "loss": 0.7639, "step": 26800 }, { "epoch": 7.38, "learning_rate": 1.7261131902543346e-06, "loss": 0.7911, "step": 26850 }, { "epoch": 7.4, "learning_rate": 1.6879248453372032e-06, "loss": 0.7851, "step": 26900 }, { "epoch": 7.41, "learning_rate": 1.649736500420072e-06, "loss": 0.7824, "step": 26950 }, { "epoch": 7.42, "learning_rate": 1.6115481555029405e-06, "loss": 0.7824, "step": 27000 }, { "epoch": 7.44, "learning_rate": 1.5733598105858094e-06, "loss": 0.935, "step": 27050 }, { "epoch": 7.45, "learning_rate": 1.5351714656686782e-06, "loss": 1.0156, "step": 27100 }, { "epoch": 7.46, "learning_rate": 1.4969831207515467e-06, "loss": 0.922, "step": 27150 }, { "epoch": 7.48, "learning_rate": 1.4587947758344155e-06, "loss": 0.7791, "step": 27200 }, { "epoch": 7.49, "learning_rate": 1.4206064309172842e-06, "loss": 0.8007, "step": 27250 }, { "epoch": 7.51, "learning_rate": 1.382418086000153e-06, "loss": 0.7997, "step": 27300 }, { "epoch": 7.52, "learning_rate": 1.3442297410830215e-06, "loss": 0.8075, "step": 27350 }, { "epoch": 7.53, "learning_rate": 1.3060413961658902e-06, "loss": 0.7847, "step": 27400 }, { "epoch": 7.55, "learning_rate": 1.2678530512487588e-06, "loss": 0.765, "step": 27450 }, { "epoch": 7.56, "learning_rate": 1.2296647063316277e-06, "loss": 0.7724, "step": 27500 }, { "epoch": 7.56, "eval_loss": 0.5088235139846802, "eval_runtime": 7.6951, "eval_samples_per_second": 184.533, "eval_steps_per_second": 11.566, "eval_wer": 0.35690296814014705, "step": 27500 }, { "epoch": 7.57, "learning_rate": 1.1914763614144965e-06, "loss": 0.7736, "step": 27550 }, { "epoch": 7.59, "learning_rate": 1.153288016497365e-06, "loss": 0.7724, "step": 27600 }, { "epoch": 7.6, "learning_rate": 1.1150996715802338e-06, "loss": 0.7754, "step": 27650 }, { "epoch": 7.62, "learning_rate": 1.0769113266631025e-06, "loss": 0.7786, "step": 27700 }, { "epoch": 7.63, "learning_rate": 1.0387229817459713e-06, "loss": 0.7843, "step": 27750 }, { "epoch": 7.64, "learning_rate": 1.0005346368288398e-06, "loss": 0.8079, "step": 27800 }, { "epoch": 7.66, "learning_rate": 9.623462919117088e-07, "loss": 0.7721, "step": 27850 }, { "epoch": 7.67, "learning_rate": 9.241579469945773e-07, "loss": 0.7931, "step": 27900 }, { "epoch": 7.68, "learning_rate": 8.85969602077446e-07, "loss": 0.7885, "step": 27950 }, { "epoch": 7.7, "learning_rate": 8.477812571603147e-07, "loss": 0.7608, "step": 28000 }, { "epoch": 7.71, "learning_rate": 8.095929122431835e-07, "loss": 0.9371, "step": 28050 }, { "epoch": 7.73, "learning_rate": 7.714045673260522e-07, "loss": 0.7869, "step": 28100 }, { "epoch": 7.74, "learning_rate": 7.332162224089208e-07, "loss": 0.7832, "step": 28150 }, { "epoch": 7.75, "learning_rate": 6.957916443901322e-07, "loss": 0.8951, "step": 28200 }, { "epoch": 7.77, "learning_rate": 6.576032994730008e-07, "loss": 0.7774, "step": 28250 }, { "epoch": 7.78, "learning_rate": 6.194149545558696e-07, "loss": 0.8053, "step": 28300 }, { "epoch": 7.79, "learning_rate": 5.812266096387383e-07, "loss": 0.7782, "step": 28350 }, { "epoch": 7.81, "learning_rate": 5.43038264721607e-07, "loss": 0.892, "step": 28400 }, { "epoch": 7.82, "learning_rate": 5.048499198044757e-07, "loss": 0.7985, "step": 28450 }, { "epoch": 7.84, "learning_rate": 4.666615748873445e-07, "loss": 0.7798, "step": 28500 }, { "epoch": 7.85, "learning_rate": 4.2847322997021307e-07, "loss": 0.7741, "step": 28550 }, { "epoch": 7.86, "learning_rate": 3.902848850530818e-07, "loss": 0.7992, "step": 28600 }, { "epoch": 7.88, "learning_rate": 3.520965401359505e-07, "loss": 0.7634, "step": 28650 }, { "epoch": 7.89, "learning_rate": 3.139081952188192e-07, "loss": 0.9145, "step": 28700 }, { "epoch": 7.9, "learning_rate": 2.7571985030168796e-07, "loss": 0.7934, "step": 28750 }, { "epoch": 7.92, "learning_rate": 2.3753150538455665e-07, "loss": 0.8851, "step": 28800 }, { "epoch": 7.93, "learning_rate": 1.9934316046742535e-07, "loss": 0.8048, "step": 28850 }, { "epoch": 7.95, "learning_rate": 1.6115481555029407e-07, "loss": 0.8224, "step": 28900 }, { "epoch": 7.96, "learning_rate": 1.2296647063316277e-07, "loss": 0.7634, "step": 28950 }, { "epoch": 7.97, "learning_rate": 8.477812571603146e-08, "loss": 0.7944, "step": 29000 }, { "epoch": 7.99, "learning_rate": 4.658978079890018e-08, "loss": 0.7537, "step": 29050 }, { "epoch": 8.0, "step": 29096, "total_flos": 3.5467013286602473e+20, "train_loss": 1.6638745573283487, "train_runtime": 15624.6123, "train_samples_per_second": 74.468, "train_steps_per_second": 1.862 } ], "logging_steps": 50, "max_steps": 29096, "num_train_epochs": 8, "save_steps": 500, "total_flos": 3.5467013286602473e+20, "trial_name": null, "trial_params": null }