{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99963086009598, "global_step": 33850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 3.6375e-06, "loss": 124.9665, "step": 100 }, { "epoch": 0.3, "learning_rate": 7.3875e-06, "loss": 92.673, "step": 200 }, { "epoch": 0.44, "learning_rate": 1.1099999999999999e-05, "loss": 74.8932, "step": 300 }, { "epoch": 0.59, "learning_rate": 1.485e-05, "loss": 68.0432, "step": 400 }, { "epoch": 0.74, "learning_rate": 1.8599999999999998e-05, "loss": 60.2112, "step": 500 }, { "epoch": 0.74, "eval_loss": 64.81886291503906, "eval_runtime": 129.9516, "eval_samples_per_second": 15.552, "eval_steps_per_second": 1.947, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.89, "learning_rate": 2.2349999999999998e-05, "loss": 51.3096, "step": 600 }, { "epoch": 1.03, "learning_rate": 2.6099999999999997e-05, "loss": 39.1106, "step": 700 }, { "epoch": 1.18, "learning_rate": 2.985e-05, "loss": 26.6843, "step": 800 }, { "epoch": 1.33, "learning_rate": 3.36e-05, "loss": 14.7864, "step": 900 }, { "epoch": 1.48, "learning_rate": 3.735e-05, "loss": 8.1128, "step": 1000 }, { "epoch": 1.48, "eval_loss": 6.899676322937012, "eval_runtime": 115.5788, "eval_samples_per_second": 17.486, "eval_steps_per_second": 2.189, "eval_wer": 1.0, "step": 1000 }, { "epoch": 1.62, "learning_rate": 4.11e-05, "loss": 6.6068, "step": 1100 }, { "epoch": 1.77, "learning_rate": 4.484999999999999e-05, "loss": 6.23, "step": 1200 }, { "epoch": 1.92, "learning_rate": 4.8599999999999995e-05, "loss": 6.0972, "step": 1300 }, { "epoch": 2.07, "learning_rate": 5.234999999999999e-05, "loss": 6.0595, "step": 1400 }, { "epoch": 2.22, "learning_rate": 5.6099999999999995e-05, "loss": 6.0492, "step": 1500 }, { "epoch": 2.22, "eval_loss": 5.967654228210449, "eval_runtime": 115.432, "eval_samples_per_second": 17.508, "eval_steps_per_second": 2.192, "eval_wer": 1.949529935675408, "step": 1500 }, { "epoch": 2.36, "learning_rate": 5.985e-05, "loss": 6.0266, "step": 1600 }, { "epoch": 2.51, "learning_rate": 6.359999999999999e-05, "loss": 5.9902, "step": 1700 }, { "epoch": 2.66, "learning_rate": 6.735e-05, "loss": 5.9762, "step": 1800 }, { "epoch": 2.81, "learning_rate": 7.11e-05, "loss": 5.9491, "step": 1900 }, { "epoch": 2.95, "learning_rate": 7.484999999999999e-05, "loss": 5.9326, "step": 2000 }, { "epoch": 2.95, "eval_loss": 5.884542942047119, "eval_runtime": 114.597, "eval_samples_per_second": 17.636, "eval_steps_per_second": 2.208, "eval_wer": 1.409203364670955, "step": 2000 }, { "epoch": 3.1, "learning_rate": 7.477394034536891e-05, "loss": 5.9356, "step": 2100 }, { "epoch": 3.25, "learning_rate": 7.453846153846153e-05, "loss": 5.8889, "step": 2200 }, { "epoch": 3.4, "learning_rate": 7.430298273155415e-05, "loss": 5.899, "step": 2300 }, { "epoch": 3.54, "learning_rate": 7.406750392464678e-05, "loss": 5.8824, "step": 2400 }, { "epoch": 3.69, "learning_rate": 7.38320251177394e-05, "loss": 5.8763, "step": 2500 }, { "epoch": 3.69, "eval_loss": 5.846009731292725, "eval_runtime": 117.5393, "eval_samples_per_second": 17.194, "eval_steps_per_second": 2.152, "eval_wer": 1.6125680356259278, "step": 2500 }, { "epoch": 3.84, "learning_rate": 7.359654631083201e-05, "loss": 5.875, "step": 2600 }, { "epoch": 3.99, "learning_rate": 7.336106750392464e-05, "loss": 5.8671, "step": 2700 }, { "epoch": 4.14, "learning_rate": 7.312558869701726e-05, "loss": 5.8591, "step": 2800 }, { "epoch": 4.28, "learning_rate": 7.289010989010989e-05, "loss": 5.8226, "step": 2900 }, { "epoch": 4.43, "learning_rate": 7.265463108320251e-05, "loss": 5.7888, "step": 3000 }, { "epoch": 4.43, "eval_loss": 5.75445032119751, "eval_runtime": 114.1832, "eval_samples_per_second": 17.7, "eval_steps_per_second": 2.216, "eval_wer": 2.2033646709549726, "step": 3000 }, { "epoch": 4.58, "learning_rate": 7.241915227629513e-05, "loss": 5.8041, "step": 3100 }, { "epoch": 4.73, "learning_rate": 7.218367346938774e-05, "loss": 5.8013, "step": 3200 }, { "epoch": 4.87, "learning_rate": 7.194819466248037e-05, "loss": 5.7947, "step": 3300 }, { "epoch": 5.02, "learning_rate": 7.171271585557299e-05, "loss": 5.7802, "step": 3400 }, { "epoch": 5.17, "learning_rate": 7.147723704866562e-05, "loss": 5.735, "step": 3500 }, { "epoch": 5.17, "eval_loss": 5.677657604217529, "eval_runtime": 115.6516, "eval_samples_per_second": 17.475, "eval_steps_per_second": 2.188, "eval_wer": 2.334982681840673, "step": 3500 }, { "epoch": 5.32, "learning_rate": 7.124175824175823e-05, "loss": 5.7198, "step": 3600 }, { "epoch": 5.47, "learning_rate": 7.100627943485086e-05, "loss": 5.7092, "step": 3700 }, { "epoch": 5.61, "learning_rate": 7.077080062794347e-05, "loss": 5.6613, "step": 3800 }, { "epoch": 5.76, "learning_rate": 7.05353218210361e-05, "loss": 5.6579, "step": 3900 }, { "epoch": 5.91, "learning_rate": 7.029984301412873e-05, "loss": 5.6861, "step": 4000 }, { "epoch": 5.91, "eval_loss": 5.517865180969238, "eval_runtime": 115.3653, "eval_samples_per_second": 17.518, "eval_steps_per_second": 2.193, "eval_wer": 2.223156853043048, "step": 4000 }, { "epoch": 6.06, "learning_rate": 7.006436420722135e-05, "loss": 5.6024, "step": 4100 }, { "epoch": 6.2, "learning_rate": 6.982888540031396e-05, "loss": 5.5497, "step": 4200 }, { "epoch": 6.35, "learning_rate": 6.959340659340659e-05, "loss": 5.5257, "step": 4300 }, { "epoch": 6.5, "learning_rate": 6.93579277864992e-05, "loss": 5.4534, "step": 4400 }, { "epoch": 6.65, "learning_rate": 6.912244897959182e-05, "loss": 5.381, "step": 4500 }, { "epoch": 6.65, "eval_loss": 5.142032146453857, "eval_runtime": 117.6237, "eval_samples_per_second": 17.182, "eval_steps_per_second": 2.151, "eval_wer": 2.18159327065809, "step": 4500 }, { "epoch": 6.79, "learning_rate": 6.888697017268445e-05, "loss": 5.3409, "step": 4600 }, { "epoch": 6.94, "learning_rate": 6.865149136577708e-05, "loss": 5.1283, "step": 4700 }, { "epoch": 7.09, "learning_rate": 6.841601255886969e-05, "loss": 4.8788, "step": 4800 }, { "epoch": 7.24, "learning_rate": 6.818053375196232e-05, "loss": 4.7235, "step": 4900 }, { "epoch": 7.39, "learning_rate": 6.794505494505494e-05, "loss": 4.625, "step": 5000 }, { "epoch": 7.39, "eval_loss": 3.9019837379455566, "eval_runtime": 116.0971, "eval_samples_per_second": 17.408, "eval_steps_per_second": 2.179, "eval_wer": 2.0722414646214746, "step": 5000 }, { "epoch": 7.53, "learning_rate": 6.770957613814756e-05, "loss": 4.5404, "step": 5100 }, { "epoch": 7.68, "learning_rate": 6.747409733124018e-05, "loss": 4.4307, "step": 5200 }, { "epoch": 7.83, "learning_rate": 6.723861852433281e-05, "loss": 4.3794, "step": 5300 }, { "epoch": 7.98, "learning_rate": 6.700313971742542e-05, "loss": 4.2786, "step": 5400 }, { "epoch": 8.12, "learning_rate": 6.676766091051805e-05, "loss": 4.214, "step": 5500 }, { "epoch": 8.12, "eval_loss": 3.339416027069092, "eval_runtime": 116.9868, "eval_samples_per_second": 17.275, "eval_steps_per_second": 2.163, "eval_wer": 2.1429985155863434, "step": 5500 }, { "epoch": 8.27, "learning_rate": 6.653218210361068e-05, "loss": 4.1206, "step": 5600 }, { "epoch": 8.42, "learning_rate": 6.62967032967033e-05, "loss": 4.081, "step": 5700 }, { "epoch": 8.57, "learning_rate": 6.606122448979591e-05, "loss": 4.0059, "step": 5800 }, { "epoch": 8.71, "learning_rate": 6.582574568288854e-05, "loss": 3.9251, "step": 5900 }, { "epoch": 8.86, "learning_rate": 6.559262166405023e-05, "loss": 3.8992, "step": 6000 }, { "epoch": 8.86, "eval_loss": 2.9084665775299072, "eval_runtime": 119.0907, "eval_samples_per_second": 16.97, "eval_steps_per_second": 2.124, "eval_wer": 2.153389411182583, "step": 6000 }, { "epoch": 9.01, "learning_rate": 6.535714285714285e-05, "loss": 3.8494, "step": 6100 }, { "epoch": 9.16, "learning_rate": 6.512166405023547e-05, "loss": 3.7923, "step": 6200 }, { "epoch": 9.31, "learning_rate": 6.48861852433281e-05, "loss": 3.7416, "step": 6300 }, { "epoch": 9.45, "learning_rate": 6.465070643642071e-05, "loss": 3.7095, "step": 6400 }, { "epoch": 9.6, "learning_rate": 6.441522762951334e-05, "loss": 3.6481, "step": 6500 }, { "epoch": 9.6, "eval_loss": 2.620758295059204, "eval_runtime": 115.2407, "eval_samples_per_second": 17.537, "eval_steps_per_second": 2.195, "eval_wer": 2.3537852548243445, "step": 6500 }, { "epoch": 9.75, "learning_rate": 6.417974882260596e-05, "loss": 3.6196, "step": 6600 }, { "epoch": 9.9, "learning_rate": 6.394427001569859e-05, "loss": 3.5941, "step": 6700 }, { "epoch": 10.04, "learning_rate": 6.37087912087912e-05, "loss": 3.5608, "step": 6800 }, { "epoch": 10.19, "learning_rate": 6.347331240188383e-05, "loss": 3.5296, "step": 6900 }, { "epoch": 10.34, "learning_rate": 6.324018838304552e-05, "loss": 3.4658, "step": 7000 }, { "epoch": 10.34, "eval_loss": 2.3172152042388916, "eval_runtime": 114.5436, "eval_samples_per_second": 17.644, "eval_steps_per_second": 2.209, "eval_wer": 2.227115289460663, "step": 7000 }, { "epoch": 10.49, "learning_rate": 6.300470957613814e-05, "loss": 3.3977, "step": 7100 }, { "epoch": 10.63, "learning_rate": 6.276923076923076e-05, "loss": 3.3987, "step": 7200 }, { "epoch": 10.78, "learning_rate": 6.253375196232339e-05, "loss": 3.3587, "step": 7300 }, { "epoch": 10.93, "learning_rate": 6.2298273155416e-05, "loss": 3.2796, "step": 7400 }, { "epoch": 11.08, "learning_rate": 6.206279434850863e-05, "loss": 3.257, "step": 7500 }, { "epoch": 11.08, "eval_loss": 2.0916049480438232, "eval_runtime": 113.9408, "eval_samples_per_second": 17.737, "eval_steps_per_second": 2.22, "eval_wer": 2.1350816427511132, "step": 7500 }, { "epoch": 11.23, "learning_rate": 6.182731554160125e-05, "loss": 3.2476, "step": 7600 }, { "epoch": 11.37, "learning_rate": 6.159183673469388e-05, "loss": 3.2463, "step": 7700 }, { "epoch": 11.52, "learning_rate": 6.135635792778649e-05, "loss": 3.2323, "step": 7800 }, { "epoch": 11.67, "learning_rate": 6.112087912087912e-05, "loss": 3.1674, "step": 7900 }, { "epoch": 11.82, "learning_rate": 6.088540031397174e-05, "loss": 3.1294, "step": 8000 }, { "epoch": 11.82, "eval_loss": 1.895378828048706, "eval_runtime": 115.1394, "eval_samples_per_second": 17.553, "eval_steps_per_second": 2.197, "eval_wer": 2.2132607619990106, "step": 8000 }, { "epoch": 11.96, "learning_rate": 6.0649921507064355e-05, "loss": 3.1262, "step": 8100 }, { "epoch": 12.11, "learning_rate": 6.041444270015698e-05, "loss": 3.0377, "step": 8200 }, { "epoch": 12.26, "learning_rate": 6.01789638932496e-05, "loss": 3.0306, "step": 8300 }, { "epoch": 12.41, "learning_rate": 5.994348508634223e-05, "loss": 3.0425, "step": 8400 }, { "epoch": 12.56, "learning_rate": 5.9710361067503915e-05, "loss": 3.0266, "step": 8500 }, { "epoch": 12.56, "eval_loss": 1.76727294921875, "eval_runtime": 114.3494, "eval_samples_per_second": 17.674, "eval_steps_per_second": 2.213, "eval_wer": 2.0895596239485403, "step": 8500 }, { "epoch": 12.7, "learning_rate": 5.9474882260596537e-05, "loss": 3.0398, "step": 8600 }, { "epoch": 12.85, "learning_rate": 5.9239403453689165e-05, "loss": 2.9985, "step": 8700 }, { "epoch": 13.0, "learning_rate": 5.900392464678179e-05, "loss": 2.9969, "step": 8800 }, { "epoch": 13.15, "learning_rate": 5.876844583987441e-05, "loss": 2.9648, "step": 8900 }, { "epoch": 13.29, "learning_rate": 5.8532967032967024e-05, "loss": 2.9451, "step": 9000 }, { "epoch": 13.29, "eval_loss": 1.665855884552002, "eval_runtime": 116.4877, "eval_samples_per_second": 17.349, "eval_steps_per_second": 2.172, "eval_wer": 2.1380504700643246, "step": 9000 }, { "epoch": 13.44, "learning_rate": 5.8297488226059645e-05, "loss": 2.9573, "step": 9100 }, { "epoch": 13.59, "learning_rate": 5.8062009419152274e-05, "loss": 2.8819, "step": 9200 }, { "epoch": 13.74, "learning_rate": 5.7826530612244896e-05, "loss": 2.8901, "step": 9300 }, { "epoch": 13.88, "learning_rate": 5.759105180533752e-05, "loss": 2.8492, "step": 9400 }, { "epoch": 14.03, "learning_rate": 5.735557299843013e-05, "loss": 2.8802, "step": 9500 }, { "epoch": 14.03, "eval_loss": 1.5637215375900269, "eval_runtime": 115.1622, "eval_samples_per_second": 17.549, "eval_steps_per_second": 2.197, "eval_wer": 2.1969322117763483, "step": 9500 }, { "epoch": 14.18, "learning_rate": 5.7120094191522754e-05, "loss": 2.8346, "step": 9600 }, { "epoch": 14.33, "learning_rate": 5.6884615384615376e-05, "loss": 2.8355, "step": 9700 }, { "epoch": 14.48, "learning_rate": 5.6649136577708005e-05, "loss": 2.8124, "step": 9800 }, { "epoch": 14.62, "learning_rate": 5.6413657770800626e-05, "loss": 2.7879, "step": 9900 }, { "epoch": 14.77, "learning_rate": 5.617817896389324e-05, "loss": 2.78, "step": 10000 }, { "epoch": 14.77, "eval_loss": 1.4921427965164185, "eval_runtime": 115.1, "eval_samples_per_second": 17.559, "eval_steps_per_second": 2.198, "eval_wer": 2.2335477486392876, "step": 10000 }, { "epoch": 14.92, "learning_rate": 5.594270015698586e-05, "loss": 2.775, "step": 10100 }, { "epoch": 15.07, "learning_rate": 5.5707221350078485e-05, "loss": 2.7478, "step": 10200 }, { "epoch": 15.21, "learning_rate": 5.5471742543171114e-05, "loss": 2.7224, "step": 10300 }, { "epoch": 15.36, "learning_rate": 5.5236263736263735e-05, "loss": 2.7506, "step": 10400 }, { "epoch": 15.51, "learning_rate": 5.500078492935635e-05, "loss": 2.7049, "step": 10500 }, { "epoch": 15.51, "eval_loss": 1.413183569908142, "eval_runtime": 114.2743, "eval_samples_per_second": 17.686, "eval_steps_per_second": 2.214, "eval_wer": 2.221672439386442, "step": 10500 }, { "epoch": 15.66, "learning_rate": 5.476766091051805e-05, "loss": 2.7145, "step": 10600 }, { "epoch": 15.8, "learning_rate": 5.453218210361067e-05, "loss": 2.6892, "step": 10700 }, { "epoch": 15.95, "learning_rate": 5.4296703296703295e-05, "loss": 2.69, "step": 10800 }, { "epoch": 16.1, "learning_rate": 5.406122448979591e-05, "loss": 2.623, "step": 10900 }, { "epoch": 16.25, "learning_rate": 5.382574568288853e-05, "loss": 2.6768, "step": 11000 }, { "epoch": 16.25, "eval_loss": 1.3666878938674927, "eval_runtime": 119.4402, "eval_samples_per_second": 16.921, "eval_steps_per_second": 2.118, "eval_wer": 2.223156853043048, "step": 11000 }, { "epoch": 16.4, "learning_rate": 5.359262166405023e-05, "loss": 2.628, "step": 11100 }, { "epoch": 16.54, "learning_rate": 5.3357142857142854e-05, "loss": 2.6163, "step": 11200 }, { "epoch": 16.69, "learning_rate": 5.312166405023547e-05, "loss": 2.6193, "step": 11300 }, { "epoch": 16.84, "learning_rate": 5.28861852433281e-05, "loss": 2.6531, "step": 11400 }, { "epoch": 16.99, "learning_rate": 5.265070643642072e-05, "loss": 2.6358, "step": 11500 }, { "epoch": 16.99, "eval_loss": 1.311090111732483, "eval_runtime": 116.2157, "eval_samples_per_second": 17.39, "eval_steps_per_second": 2.177, "eval_wer": 2.128649183572489, "step": 11500 }, { "epoch": 17.13, "learning_rate": 5.241522762951334e-05, "loss": 2.5748, "step": 11600 }, { "epoch": 17.28, "learning_rate": 5.217974882260596e-05, "loss": 2.6287, "step": 11700 }, { "epoch": 17.43, "learning_rate": 5.194427001569858e-05, "loss": 2.5583, "step": 11800 }, { "epoch": 17.58, "learning_rate": 5.17087912087912e-05, "loss": 2.5547, "step": 11900 }, { "epoch": 17.72, "learning_rate": 5.147331240188383e-05, "loss": 2.5802, "step": 12000 }, { "epoch": 17.72, "eval_loss": 1.2678567171096802, "eval_runtime": 116.076, "eval_samples_per_second": 17.411, "eval_steps_per_second": 2.18, "eval_wer": 2.1429985155863434, "step": 12000 }, { "epoch": 17.87, "learning_rate": 5.123783359497645e-05, "loss": 2.557, "step": 12100 }, { "epoch": 18.02, "learning_rate": 5.100235478806907e-05, "loss": 2.5771, "step": 12200 }, { "epoch": 18.17, "learning_rate": 5.076687598116169e-05, "loss": 2.5393, "step": 12300 }, { "epoch": 18.32, "learning_rate": 5.053375196232339e-05, "loss": 2.5031, "step": 12400 }, { "epoch": 18.46, "learning_rate": 5.029827315541601e-05, "loss": 2.5012, "step": 12500 }, { "epoch": 18.46, "eval_loss": 1.2365446090698242, "eval_runtime": 116.0118, "eval_samples_per_second": 17.421, "eval_steps_per_second": 2.181, "eval_wer": 2.115289460663038, "step": 12500 }, { "epoch": 18.61, "learning_rate": 5.006279434850863e-05, "loss": 2.54, "step": 12600 }, { "epoch": 18.76, "learning_rate": 4.9827315541601246e-05, "loss": 2.5072, "step": 12700 }, { "epoch": 18.91, "learning_rate": 4.9591836734693875e-05, "loss": 2.4951, "step": 12800 }, { "epoch": 19.05, "learning_rate": 4.9356357927786497e-05, "loss": 2.4789, "step": 12900 }, { "epoch": 19.2, "learning_rate": 4.912087912087912e-05, "loss": 2.458, "step": 13000 }, { "epoch": 19.2, "eval_loss": 1.2117862701416016, "eval_runtime": 116.2579, "eval_samples_per_second": 17.384, "eval_steps_per_second": 2.176, "eval_wer": 2.1573478476001977, "step": 13000 }, { "epoch": 19.35, "learning_rate": 4.888540031397174e-05, "loss": 2.4616, "step": 13100 }, { "epoch": 19.5, "learning_rate": 4.8649921507064355e-05, "loss": 2.4739, "step": 13200 }, { "epoch": 19.65, "learning_rate": 4.8414442700156984e-05, "loss": 2.4867, "step": 13300 }, { "epoch": 19.79, "learning_rate": 4.8178963893249605e-05, "loss": 2.4568, "step": 13400 }, { "epoch": 19.94, "learning_rate": 4.794348508634223e-05, "loss": 2.4433, "step": 13500 }, { "epoch": 19.94, "eval_loss": 1.1991767883300781, "eval_runtime": 114.5641, "eval_samples_per_second": 17.641, "eval_steps_per_second": 2.208, "eval_wer": 2.1335972290945078, "step": 13500 }, { "epoch": 20.09, "learning_rate": 4.770800627943485e-05, "loss": 2.4532, "step": 13600 }, { "epoch": 20.24, "learning_rate": 4.7472527472527464e-05, "loss": 2.3913, "step": 13700 }, { "epoch": 20.38, "learning_rate": 4.7237048665620086e-05, "loss": 2.421, "step": 13800 }, { "epoch": 20.53, "learning_rate": 4.7001569858712714e-05, "loss": 2.4526, "step": 13900 }, { "epoch": 20.68, "learning_rate": 4.6766091051805336e-05, "loss": 2.438, "step": 14000 }, { "epoch": 20.68, "eval_loss": 1.180332064628601, "eval_runtime": 116.5012, "eval_samples_per_second": 17.347, "eval_steps_per_second": 2.172, "eval_wer": 2.1509153884215735, "step": 14000 }, { "epoch": 20.83, "learning_rate": 4.653061224489796e-05, "loss": 2.4034, "step": 14100 }, { "epoch": 20.97, "learning_rate": 4.629513343799057e-05, "loss": 2.4306, "step": 14200 }, { "epoch": 21.12, "learning_rate": 4.6059654631083195e-05, "loss": 2.4145, "step": 14300 }, { "epoch": 21.27, "learning_rate": 4.582417582417582e-05, "loss": 2.4677, "step": 14400 }, { "epoch": 21.42, "learning_rate": 4.5588697017268445e-05, "loss": 2.418, "step": 14500 }, { "epoch": 21.42, "eval_loss": 1.1601430177688599, "eval_runtime": 114.5652, "eval_samples_per_second": 17.641, "eval_steps_per_second": 2.208, "eval_wer": 2.1232063334982683, "step": 14500 }, { "epoch": 21.57, "learning_rate": 4.535321821036107e-05, "loss": 2.3967, "step": 14600 }, { "epoch": 21.71, "learning_rate": 4.511773940345368e-05, "loss": 2.3939, "step": 14700 }, { "epoch": 21.86, "learning_rate": 4.4882260596546304e-05, "loss": 2.3925, "step": 14800 }, { "epoch": 22.01, "learning_rate": 4.4646781789638925e-05, "loss": 2.3596, "step": 14900 }, { "epoch": 22.16, "learning_rate": 4.4411302982731554e-05, "loss": 2.3322, "step": 15000 }, { "epoch": 22.16, "eval_loss": 1.1417704820632935, "eval_runtime": 116.2111, "eval_samples_per_second": 17.391, "eval_steps_per_second": 2.177, "eval_wer": 2.1929737753587335, "step": 15000 }, { "epoch": 22.3, "learning_rate": 4.4175824175824176e-05, "loss": 2.3821, "step": 15100 }, { "epoch": 22.45, "learning_rate": 4.394034536891679e-05, "loss": 2.3435, "step": 15200 }, { "epoch": 22.6, "learning_rate": 4.370486656200941e-05, "loss": 2.3542, "step": 15300 }, { "epoch": 22.75, "learning_rate": 4.3469387755102034e-05, "loss": 2.3469, "step": 15400 }, { "epoch": 22.89, "learning_rate": 4.323390894819466e-05, "loss": 2.3387, "step": 15500 }, { "epoch": 22.89, "eval_loss": 1.1172302961349487, "eval_runtime": 114.3169, "eval_samples_per_second": 17.679, "eval_steps_per_second": 2.213, "eval_wer": 2.2464126669965365, "step": 15500 }, { "epoch": 23.04, "learning_rate": 4.2998430141287285e-05, "loss": 2.3688, "step": 15600 }, { "epoch": 23.19, "learning_rate": 4.27629513343799e-05, "loss": 2.3344, "step": 15700 }, { "epoch": 23.34, "learning_rate": 4.252747252747252e-05, "loss": 2.3245, "step": 15800 }, { "epoch": 23.49, "learning_rate": 4.229199372056514e-05, "loss": 2.3523, "step": 15900 }, { "epoch": 23.63, "learning_rate": 4.205651491365777e-05, "loss": 2.3349, "step": 16000 }, { "epoch": 23.63, "eval_loss": 1.1144375801086426, "eval_runtime": 116.2412, "eval_samples_per_second": 17.386, "eval_steps_per_second": 2.177, "eval_wer": 2.185551707075705, "step": 16000 }, { "epoch": 23.78, "learning_rate": 4.1821036106750393e-05, "loss": 2.2847, "step": 16100 }, { "epoch": 23.93, "learning_rate": 4.158555729984301e-05, "loss": 2.3303, "step": 16200 }, { "epoch": 24.08, "learning_rate": 4.135007849293563e-05, "loss": 2.2994, "step": 16300 }, { "epoch": 24.22, "learning_rate": 4.111459968602825e-05, "loss": 2.2887, "step": 16400 }, { "epoch": 24.37, "learning_rate": 4.0879120879120874e-05, "loss": 2.291, "step": 16500 }, { "epoch": 24.37, "eval_loss": 1.1018128395080566, "eval_runtime": 114.9042, "eval_samples_per_second": 17.589, "eval_steps_per_second": 2.202, "eval_wer": 2.1929737753587335, "step": 16500 }, { "epoch": 24.52, "learning_rate": 4.06436420722135e-05, "loss": 2.2888, "step": 16600 }, { "epoch": 24.67, "learning_rate": 4.040816326530612e-05, "loss": 2.2724, "step": 16700 }, { "epoch": 24.82, "learning_rate": 4.017268445839874e-05, "loss": 2.2922, "step": 16800 }, { "epoch": 24.96, "learning_rate": 3.993720565149136e-05, "loss": 2.2934, "step": 16900 }, { "epoch": 25.11, "learning_rate": 3.970172684458398e-05, "loss": 2.2766, "step": 17000 }, { "epoch": 25.11, "eval_loss": 1.0882744789123535, "eval_runtime": 117.2941, "eval_samples_per_second": 17.23, "eval_steps_per_second": 2.157, "eval_wer": 2.1761504205838693, "step": 17000 }, { "epoch": 25.26, "learning_rate": 3.946624803767661e-05, "loss": 2.2656, "step": 17100 }, { "epoch": 25.41, "learning_rate": 3.9230769230769226e-05, "loss": 2.2929, "step": 17200 }, { "epoch": 25.55, "learning_rate": 3.899529042386185e-05, "loss": 2.2513, "step": 17300 }, { "epoch": 25.7, "learning_rate": 3.875981161695447e-05, "loss": 2.2603, "step": 17400 }, { "epoch": 25.85, "learning_rate": 3.852433281004709e-05, "loss": 2.2534, "step": 17500 }, { "epoch": 25.85, "eval_loss": 1.0743526220321655, "eval_runtime": 118.2043, "eval_samples_per_second": 17.098, "eval_steps_per_second": 2.14, "eval_wer": 2.1875309252845128, "step": 17500 }, { "epoch": 26.0, "learning_rate": 3.8288854003139713e-05, "loss": 2.2716, "step": 17600 }, { "epoch": 26.14, "learning_rate": 3.8053375196232335e-05, "loss": 2.2486, "step": 17700 }, { "epoch": 26.29, "learning_rate": 3.781789638932496e-05, "loss": 2.2068, "step": 17800 }, { "epoch": 26.44, "learning_rate": 3.758241758241758e-05, "loss": 2.2431, "step": 17900 }, { "epoch": 26.59, "learning_rate": 3.73469387755102e-05, "loss": 2.2393, "step": 18000 }, { "epoch": 26.59, "eval_loss": 1.0561192035675049, "eval_runtime": 116.8996, "eval_samples_per_second": 17.288, "eval_steps_per_second": 2.164, "eval_wer": 2.1845620979713014, "step": 18000 }, { "epoch": 26.74, "learning_rate": 3.711145996860282e-05, "loss": 2.1944, "step": 18100 }, { "epoch": 26.88, "learning_rate": 3.6875981161695444e-05, "loss": 2.2359, "step": 18200 }, { "epoch": 27.03, "learning_rate": 3.664285714285714e-05, "loss": 2.2097, "step": 18300 }, { "epoch": 27.18, "learning_rate": 3.640737833594976e-05, "loss": 2.1431, "step": 18400 }, { "epoch": 27.33, "learning_rate": 3.617189952904238e-05, "loss": 2.2085, "step": 18500 }, { "epoch": 27.33, "eval_loss": 1.0465816259384155, "eval_runtime": 115.87, "eval_samples_per_second": 17.442, "eval_steps_per_second": 2.183, "eval_wer": 2.1444829292429493, "step": 18500 }, { "epoch": 27.47, "learning_rate": 3.5936420722135003e-05, "loss": 2.2204, "step": 18600 }, { "epoch": 27.62, "learning_rate": 3.5700941915227625e-05, "loss": 2.242, "step": 18700 }, { "epoch": 27.77, "learning_rate": 3.546546310832025e-05, "loss": 2.1699, "step": 18800 }, { "epoch": 27.92, "learning_rate": 3.522998430141287e-05, "loss": 2.2152, "step": 18900 }, { "epoch": 28.06, "learning_rate": 3.499450549450549e-05, "loss": 2.1966, "step": 19000 }, { "epoch": 28.06, "eval_loss": 1.0382250547409058, "eval_runtime": 116.4655, "eval_samples_per_second": 17.353, "eval_steps_per_second": 2.172, "eval_wer": 2.1088570014844135, "step": 19000 }, { "epoch": 28.21, "learning_rate": 3.475902668759811e-05, "loss": 2.169, "step": 19100 }, { "epoch": 28.36, "learning_rate": 3.4523547880690734e-05, "loss": 2.1981, "step": 19200 }, { "epoch": 28.51, "learning_rate": 3.4288069073783356e-05, "loss": 2.1692, "step": 19300 }, { "epoch": 28.66, "learning_rate": 3.405259026687598e-05, "loss": 2.1931, "step": 19400 }, { "epoch": 28.8, "learning_rate": 3.38171114599686e-05, "loss": 2.1794, "step": 19500 }, { "epoch": 28.8, "eval_loss": 1.0263785123825073, "eval_runtime": 114.5988, "eval_samples_per_second": 17.635, "eval_steps_per_second": 2.208, "eval_wer": 1.9861454725383474, "step": 19500 }, { "epoch": 28.95, "learning_rate": 3.358163265306122e-05, "loss": 2.1638, "step": 19600 }, { "epoch": 29.1, "learning_rate": 3.334615384615384e-05, "loss": 2.1714, "step": 19700 }, { "epoch": 29.25, "learning_rate": 3.3110675039246465e-05, "loss": 2.1514, "step": 19800 }, { "epoch": 29.39, "learning_rate": 3.2875196232339087e-05, "loss": 2.1374, "step": 19900 }, { "epoch": 29.54, "learning_rate": 3.263971742543171e-05, "loss": 2.1423, "step": 20000 }, { "epoch": 29.54, "eval_loss": 1.0245550870895386, "eval_runtime": 116.8375, "eval_samples_per_second": 17.298, "eval_steps_per_second": 2.165, "eval_wer": 1.9678377041068777, "step": 20000 }, { "epoch": 29.69, "learning_rate": 3.240423861852433e-05, "loss": 2.1807, "step": 20100 }, { "epoch": 29.84, "learning_rate": 3.216875981161695e-05, "loss": 2.1545, "step": 20200 }, { "epoch": 29.98, "learning_rate": 3.1933281004709574e-05, "loss": 2.1404, "step": 20300 }, { "epoch": 30.13, "learning_rate": 3.1697802197802195e-05, "loss": 2.1089, "step": 20400 }, { "epoch": 30.28, "learning_rate": 3.146232339089482e-05, "loss": 2.1649, "step": 20500 }, { "epoch": 30.28, "eval_loss": 0.9981661438941956, "eval_runtime": 116.056, "eval_samples_per_second": 17.414, "eval_steps_per_second": 2.18, "eval_wer": 2.000494804552202, "step": 20500 }, { "epoch": 30.43, "learning_rate": 3.122684458398744e-05, "loss": 2.1425, "step": 20600 }, { "epoch": 30.58, "learning_rate": 3.099136577708006e-05, "loss": 2.1357, "step": 20700 }, { "epoch": 30.72, "learning_rate": 3.0758241758241755e-05, "loss": 2.1251, "step": 20800 }, { "epoch": 30.87, "learning_rate": 3.052276295133438e-05, "loss": 2.1256, "step": 20900 }, { "epoch": 31.02, "learning_rate": 3.0287284144427e-05, "loss": 2.143, "step": 21000 }, { "epoch": 31.02, "eval_loss": 0.9985482692718506, "eval_runtime": 116.0424, "eval_samples_per_second": 17.416, "eval_steps_per_second": 2.18, "eval_wer": 2.045027214250371, "step": 21000 }, { "epoch": 31.17, "learning_rate": 3.005180533751962e-05, "loss": 2.0744, "step": 21100 }, { "epoch": 31.31, "learning_rate": 2.9816326530612242e-05, "loss": 2.0831, "step": 21200 }, { "epoch": 31.46, "learning_rate": 2.9583202511773936e-05, "loss": 2.1254, "step": 21300 }, { "epoch": 31.61, "learning_rate": 2.934772370486656e-05, "loss": 2.1357, "step": 21400 }, { "epoch": 31.76, "learning_rate": 2.911224489795918e-05, "loss": 2.1338, "step": 21500 }, { "epoch": 31.76, "eval_loss": 0.9932034611701965, "eval_runtime": 114.6961, "eval_samples_per_second": 17.62, "eval_steps_per_second": 2.206, "eval_wer": 2.0024740227610094, "step": 21500 }, { "epoch": 31.91, "learning_rate": 2.8876766091051805e-05, "loss": 2.1053, "step": 21600 }, { "epoch": 32.05, "learning_rate": 2.8641287284144426e-05, "loss": 2.1111, "step": 21700 }, { "epoch": 32.2, "learning_rate": 2.8405808477237045e-05, "loss": 2.1028, "step": 21800 }, { "epoch": 32.35, "learning_rate": 2.817032967032967e-05, "loss": 2.0879, "step": 21900 }, { "epoch": 32.5, "learning_rate": 2.793485086342229e-05, "loss": 2.1076, "step": 22000 }, { "epoch": 32.5, "eval_loss": 0.9902665019035339, "eval_runtime": 120.6987, "eval_samples_per_second": 16.744, "eval_steps_per_second": 2.096, "eval_wer": 2.0504700643245917, "step": 22000 }, { "epoch": 32.64, "learning_rate": 2.769937205651491e-05, "loss": 2.1107, "step": 22100 }, { "epoch": 32.79, "learning_rate": 2.7463893249607535e-05, "loss": 2.0953, "step": 22200 }, { "epoch": 32.94, "learning_rate": 2.7228414442700154e-05, "loss": 2.0619, "step": 22300 }, { "epoch": 33.09, "learning_rate": 2.6992935635792776e-05, "loss": 2.0531, "step": 22400 }, { "epoch": 33.23, "learning_rate": 2.6757456828885397e-05, "loss": 2.0519, "step": 22500 }, { "epoch": 33.23, "eval_loss": 0.9833839535713196, "eval_runtime": 116.5317, "eval_samples_per_second": 17.343, "eval_steps_per_second": 2.171, "eval_wer": 2.07372587827808, "step": 22500 }, { "epoch": 33.38, "learning_rate": 2.652197802197802e-05, "loss": 2.0493, "step": 22600 }, { "epoch": 33.53, "learning_rate": 2.6286499215070644e-05, "loss": 2.0749, "step": 22700 }, { "epoch": 33.68, "learning_rate": 2.6051020408163263e-05, "loss": 2.0838, "step": 22800 }, { "epoch": 33.83, "learning_rate": 2.5815541601255884e-05, "loss": 2.0629, "step": 22900 }, { "epoch": 33.97, "learning_rate": 2.5580062794348506e-05, "loss": 2.0534, "step": 23000 }, { "epoch": 33.97, "eval_loss": 0.9755652546882629, "eval_runtime": 114.923, "eval_samples_per_second": 17.586, "eval_steps_per_second": 2.201, "eval_wer": 2.024740227610094, "step": 23000 }, { "epoch": 34.12, "learning_rate": 2.5344583987441128e-05, "loss": 2.067, "step": 23100 }, { "epoch": 34.27, "learning_rate": 2.5109105180533746e-05, "loss": 2.0252, "step": 23200 }, { "epoch": 34.42, "learning_rate": 2.487362637362637e-05, "loss": 2.0483, "step": 23300 }, { "epoch": 34.56, "learning_rate": 2.4638147566718993e-05, "loss": 2.0464, "step": 23400 }, { "epoch": 34.71, "learning_rate": 2.4402668759811615e-05, "loss": 2.0121, "step": 23500 }, { "epoch": 34.71, "eval_loss": 0.968792736530304, "eval_runtime": 114.3088, "eval_samples_per_second": 17.68, "eval_steps_per_second": 2.213, "eval_wer": 2.1439881246907473, "step": 23500 }, { "epoch": 34.86, "learning_rate": 2.4167189952904237e-05, "loss": 2.036, "step": 23600 }, { "epoch": 35.01, "learning_rate": 2.3931711145996855e-05, "loss": 2.013, "step": 23700 }, { "epoch": 35.16, "learning_rate": 2.369623233908948e-05, "loss": 2.0043, "step": 23800 }, { "epoch": 35.3, "learning_rate": 2.3460753532182102e-05, "loss": 2.037, "step": 23900 }, { "epoch": 35.45, "learning_rate": 2.322527472527472e-05, "loss": 2.0161, "step": 24000 }, { "epoch": 35.45, "eval_loss": 0.9581586718559265, "eval_runtime": 115.925, "eval_samples_per_second": 17.434, "eval_steps_per_second": 2.182, "eval_wer": 2.1232063334982683, "step": 24000 }, { "epoch": 35.6, "learning_rate": 2.2989795918367346e-05, "loss": 2.0256, "step": 24100 }, { "epoch": 35.75, "learning_rate": 2.2754317111459968e-05, "loss": 2.0265, "step": 24200 }, { "epoch": 35.89, "learning_rate": 2.251883830455259e-05, "loss": 2.0298, "step": 24300 }, { "epoch": 36.04, "learning_rate": 2.228335949764521e-05, "loss": 2.0028, "step": 24400 }, { "epoch": 36.19, "learning_rate": 2.204788069073783e-05, "loss": 2.0178, "step": 24500 }, { "epoch": 36.19, "eval_loss": 0.9480372071266174, "eval_runtime": 116.8212, "eval_samples_per_second": 17.3, "eval_steps_per_second": 2.166, "eval_wer": 2.0895596239485403, "step": 24500 }, { "epoch": 36.34, "learning_rate": 2.1812401883830455e-05, "loss": 2.008, "step": 24600 }, { "epoch": 36.48, "learning_rate": 2.1576923076923076e-05, "loss": 2.0132, "step": 24700 }, { "epoch": 36.63, "learning_rate": 2.1341444270015695e-05, "loss": 2.0204, "step": 24800 }, { "epoch": 36.78, "learning_rate": 2.110596546310832e-05, "loss": 1.9806, "step": 24900 }, { "epoch": 36.93, "learning_rate": 2.087048665620094e-05, "loss": 2.0154, "step": 25000 }, { "epoch": 36.93, "eval_loss": 0.9483017325401306, "eval_runtime": 117.4294, "eval_samples_per_second": 17.21, "eval_steps_per_second": 2.154, "eval_wer": 2.078673923800099, "step": 25000 }, { "epoch": 37.08, "learning_rate": 2.063500784929356e-05, "loss": 1.997, "step": 25100 }, { "epoch": 37.22, "learning_rate": 2.0399529042386185e-05, "loss": 1.9712, "step": 25200 }, { "epoch": 37.37, "learning_rate": 2.0164050235478804e-05, "loss": 2.0131, "step": 25300 }, { "epoch": 37.52, "learning_rate": 1.992857142857143e-05, "loss": 1.9605, "step": 25400 }, { "epoch": 37.67, "learning_rate": 1.9695447409733123e-05, "loss": 1.9966, "step": 25500 }, { "epoch": 37.67, "eval_loss": 0.940608024597168, "eval_runtime": 115.2635, "eval_samples_per_second": 17.534, "eval_steps_per_second": 2.195, "eval_wer": 2.0296882731321126, "step": 25500 }, { "epoch": 37.81, "learning_rate": 1.945996860282574e-05, "loss": 1.9879, "step": 25600 }, { "epoch": 37.96, "learning_rate": 1.9224489795918367e-05, "loss": 1.9836, "step": 25700 }, { "epoch": 38.11, "learning_rate": 1.8989010989010988e-05, "loss": 1.9872, "step": 25800 }, { "epoch": 38.26, "learning_rate": 1.8753532182103607e-05, "loss": 1.9684, "step": 25900 }, { "epoch": 38.4, "learning_rate": 1.851805337519623e-05, "loss": 1.9753, "step": 26000 }, { "epoch": 38.4, "eval_loss": 0.9418594837188721, "eval_runtime": 115.7124, "eval_samples_per_second": 17.466, "eval_steps_per_second": 2.186, "eval_wer": 2.0346363186541314, "step": 26000 }, { "epoch": 38.55, "learning_rate": 1.828257456828885e-05, "loss": 1.9926, "step": 26100 }, { "epoch": 38.7, "learning_rate": 1.8047095761381475e-05, "loss": 1.9685, "step": 26200 }, { "epoch": 38.85, "learning_rate": 1.7811616954474097e-05, "loss": 1.9707, "step": 26300 }, { "epoch": 39.0, "learning_rate": 1.7576138147566716e-05, "loss": 1.9477, "step": 26400 }, { "epoch": 39.14, "learning_rate": 1.7340659340659337e-05, "loss": 1.9524, "step": 26500 }, { "epoch": 39.14, "eval_loss": 0.927354097366333, "eval_runtime": 115.8614, "eval_samples_per_second": 17.443, "eval_steps_per_second": 2.184, "eval_wer": 2.0697674418604652, "step": 26500 }, { "epoch": 39.29, "learning_rate": 1.7105180533751963e-05, "loss": 1.9673, "step": 26600 }, { "epoch": 39.44, "learning_rate": 1.6869701726844584e-05, "loss": 1.9802, "step": 26700 }, { "epoch": 39.59, "learning_rate": 1.6634222919937203e-05, "loss": 1.9408, "step": 26800 }, { "epoch": 39.73, "learning_rate": 1.6398744113029824e-05, "loss": 1.9482, "step": 26900 }, { "epoch": 39.88, "learning_rate": 1.6163265306122446e-05, "loss": 1.9427, "step": 27000 }, { "epoch": 39.88, "eval_loss": 0.9232719540596008, "eval_runtime": 116.3191, "eval_samples_per_second": 17.375, "eval_steps_per_second": 2.175, "eval_wer": 2.078673923800099, "step": 27000 }, { "epoch": 40.03, "learning_rate": 1.592778649921507e-05, "loss": 1.9653, "step": 27100 }, { "epoch": 40.18, "learning_rate": 1.569230769230769e-05, "loss": 1.9157, "step": 27200 }, { "epoch": 40.32, "learning_rate": 1.545682888540031e-05, "loss": 1.9493, "step": 27300 }, { "epoch": 40.47, "learning_rate": 1.5221350078492935e-05, "loss": 1.8974, "step": 27400 }, { "epoch": 40.62, "learning_rate": 1.4985871271585557e-05, "loss": 1.9258, "step": 27500 }, { "epoch": 40.62, "eval_loss": 0.9182448983192444, "eval_runtime": 115.4065, "eval_samples_per_second": 17.512, "eval_steps_per_second": 2.192, "eval_wer": 2.052944087085601, "step": 27500 }, { "epoch": 40.77, "learning_rate": 1.4750392464678177e-05, "loss": 1.9354, "step": 27600 }, { "epoch": 40.92, "learning_rate": 1.4514913657770799e-05, "loss": 1.952, "step": 27700 }, { "epoch": 41.06, "learning_rate": 1.4281789638932496e-05, "loss": 1.9231, "step": 27800 }, { "epoch": 41.21, "learning_rate": 1.4046310832025116e-05, "loss": 1.9465, "step": 27900 }, { "epoch": 41.36, "learning_rate": 1.3810832025117738e-05, "loss": 1.9031, "step": 28000 }, { "epoch": 41.36, "eval_loss": 0.9149593114852905, "eval_runtime": 116.2555, "eval_samples_per_second": 17.384, "eval_steps_per_second": 2.176, "eval_wer": 2.078673923800099, "step": 28000 }, { "epoch": 41.51, "learning_rate": 1.357535321821036e-05, "loss": 1.9361, "step": 28100 }, { "epoch": 41.65, "learning_rate": 1.3342229199372054e-05, "loss": 1.916, "step": 28200 }, { "epoch": 41.8, "learning_rate": 1.3106750392464677e-05, "loss": 1.9149, "step": 28300 }, { "epoch": 41.95, "learning_rate": 1.2871271585557299e-05, "loss": 1.9037, "step": 28400 }, { "epoch": 42.1, "learning_rate": 1.263579277864992e-05, "loss": 1.9297, "step": 28500 }, { "epoch": 42.1, "eval_loss": 0.9040070176124573, "eval_runtime": 113.8901, "eval_samples_per_second": 17.745, "eval_steps_per_second": 2.221, "eval_wer": 2.0504700643245917, "step": 28500 }, { "epoch": 42.25, "learning_rate": 1.2400313971742541e-05, "loss": 1.8855, "step": 28600 }, { "epoch": 42.39, "learning_rate": 1.2164835164835163e-05, "loss": 1.9095, "step": 28700 }, { "epoch": 42.54, "learning_rate": 1.1929356357927786e-05, "loss": 1.8913, "step": 28800 }, { "epoch": 42.69, "learning_rate": 1.1693877551020408e-05, "loss": 1.8685, "step": 28900 }, { "epoch": 42.84, "learning_rate": 1.1458398744113028e-05, "loss": 1.9041, "step": 29000 }, { "epoch": 42.84, "eval_loss": 0.9008907675743103, "eval_runtime": 114.9643, "eval_samples_per_second": 17.579, "eval_steps_per_second": 2.201, "eval_wer": 2.05789213260762, "step": 29000 }, { "epoch": 42.98, "learning_rate": 1.122291993720565e-05, "loss": 1.8963, "step": 29100 }, { "epoch": 43.13, "learning_rate": 1.0987441130298273e-05, "loss": 1.9068, "step": 29200 }, { "epoch": 43.28, "learning_rate": 1.0751962323390895e-05, "loss": 1.9003, "step": 29300 }, { "epoch": 43.43, "learning_rate": 1.0516483516483515e-05, "loss": 1.891, "step": 29400 }, { "epoch": 43.57, "learning_rate": 1.0281004709576137e-05, "loss": 1.8929, "step": 29500 }, { "epoch": 43.57, "eval_loss": 0.8968304991722107, "eval_runtime": 116.4378, "eval_samples_per_second": 17.357, "eval_steps_per_second": 2.173, "eval_wer": 2.032657100445324, "step": 29500 }, { "epoch": 43.72, "learning_rate": 1.0045525902668759e-05, "loss": 1.8827, "step": 29600 }, { "epoch": 43.87, "learning_rate": 9.810047095761382e-06, "loss": 1.8862, "step": 29700 }, { "epoch": 44.02, "learning_rate": 9.574568288854002e-06, "loss": 1.8787, "step": 29800 }, { "epoch": 44.17, "learning_rate": 9.339089481946624e-06, "loss": 1.8501, "step": 29900 }, { "epoch": 44.31, "learning_rate": 9.103610675039246e-06, "loss": 1.9077, "step": 30000 }, { "epoch": 44.31, "eval_loss": 0.8953686952590942, "eval_runtime": 115.4838, "eval_samples_per_second": 17.5, "eval_steps_per_second": 2.191, "eval_wer": 2.061850569025235, "step": 30000 }, { "epoch": 44.46, "learning_rate": 8.868131868131868e-06, "loss": 1.8804, "step": 30100 }, { "epoch": 44.61, "learning_rate": 8.63265306122449e-06, "loss": 1.8723, "step": 30200 }, { "epoch": 44.76, "learning_rate": 8.397174254317111e-06, "loss": 1.8577, "step": 30300 }, { "epoch": 44.9, "learning_rate": 8.161695447409733e-06, "loss": 1.8811, "step": 30400 }, { "epoch": 45.05, "learning_rate": 7.928571428571429e-06, "loss": 1.8504, "step": 30500 }, { "epoch": 45.05, "eval_loss": 0.892192542552948, "eval_runtime": 116.2513, "eval_samples_per_second": 17.385, "eval_steps_per_second": 2.176, "eval_wer": 2.07372587827808, "step": 30500 }, { "epoch": 45.2, "learning_rate": 7.693092621664049e-06, "loss": 1.861, "step": 30600 }, { "epoch": 45.35, "learning_rate": 7.457613814756671e-06, "loss": 1.8496, "step": 30700 }, { "epoch": 45.49, "learning_rate": 7.222135007849293e-06, "loss": 1.8612, "step": 30800 }, { "epoch": 45.64, "learning_rate": 6.986656200941915e-06, "loss": 1.865, "step": 30900 }, { "epoch": 45.79, "learning_rate": 6.751177394034536e-06, "loss": 1.8732, "step": 31000 }, { "epoch": 45.79, "eval_loss": 0.8897548317909241, "eval_runtime": 116.5927, "eval_samples_per_second": 17.334, "eval_steps_per_second": 2.17, "eval_wer": 2.0682830282038593, "step": 31000 }, { "epoch": 45.94, "learning_rate": 6.5156985871271585e-06, "loss": 1.8374, "step": 31100 }, { "epoch": 46.09, "learning_rate": 6.280219780219779e-06, "loss": 1.8395, "step": 31200 }, { "epoch": 46.23, "learning_rate": 6.044740973312402e-06, "loss": 1.8377, "step": 31300 }, { "epoch": 46.38, "learning_rate": 5.809262166405023e-06, "loss": 1.87, "step": 31400 }, { "epoch": 46.53, "learning_rate": 5.573783359497644e-06, "loss": 1.877, "step": 31500 }, { "epoch": 46.53, "eval_loss": 0.8848925828933716, "eval_runtime": 116.1465, "eval_samples_per_second": 17.4, "eval_steps_per_second": 2.178, "eval_wer": 2.0588817417120238, "step": 31500 }, { "epoch": 46.68, "learning_rate": 5.3383045525902665e-06, "loss": 1.8256, "step": 31600 }, { "epoch": 46.82, "learning_rate": 5.1028257456828875e-06, "loss": 1.8317, "step": 31700 }, { "epoch": 46.97, "learning_rate": 4.86734693877551e-06, "loss": 1.8579, "step": 31800 }, { "epoch": 47.12, "learning_rate": 4.631868131868132e-06, "loss": 1.839, "step": 31900 }, { "epoch": 47.27, "learning_rate": 4.396389324960754e-06, "loss": 1.8587, "step": 32000 }, { "epoch": 47.27, "eval_loss": 0.8843359351158142, "eval_runtime": 116.5866, "eval_samples_per_second": 17.335, "eval_steps_per_second": 2.17, "eval_wer": 2.045027214250371, "step": 32000 }, { "epoch": 47.41, "learning_rate": 4.160910518053375e-06, "loss": 1.8419, "step": 32100 }, { "epoch": 47.56, "learning_rate": 3.925431711145996e-06, "loss": 1.8639, "step": 32200 }, { "epoch": 47.71, "learning_rate": 3.6899529042386186e-06, "loss": 1.8395, "step": 32300 }, { "epoch": 47.86, "learning_rate": 3.45447409733124e-06, "loss": 1.8369, "step": 32400 }, { "epoch": 48.01, "learning_rate": 3.2189952904238617e-06, "loss": 1.8236, "step": 32500 }, { "epoch": 48.01, "eval_loss": 0.8810222148895264, "eval_runtime": 115.817, "eval_samples_per_second": 17.45, "eval_steps_per_second": 2.184, "eval_wer": 2.0554181098466104, "step": 32500 }, { "epoch": 48.15, "learning_rate": 2.9835164835164835e-06, "loss": 1.8468, "step": 32600 }, { "epoch": 48.3, "learning_rate": 2.7503924646781788e-06, "loss": 1.8326, "step": 32700 }, { "epoch": 48.45, "learning_rate": 2.5149136577708006e-06, "loss": 1.8279, "step": 32800 }, { "epoch": 48.6, "learning_rate": 2.2794348508634223e-06, "loss": 1.8324, "step": 32900 }, { "epoch": 48.74, "learning_rate": 2.043956043956044e-06, "loss": 1.8392, "step": 33000 }, { "epoch": 48.74, "eval_loss": 0.8820456266403198, "eval_runtime": 115.6891, "eval_samples_per_second": 17.469, "eval_steps_per_second": 2.187, "eval_wer": 2.0573973280554183, "step": 33000 }, { "epoch": 48.89, "learning_rate": 1.8084772370486653e-06, "loss": 1.8363, "step": 33100 }, { "epoch": 49.04, "learning_rate": 1.572998430141287e-06, "loss": 1.7996, "step": 33200 }, { "epoch": 49.19, "learning_rate": 1.3375196232339088e-06, "loss": 1.8113, "step": 33300 }, { "epoch": 49.34, "learning_rate": 1.1020408163265304e-06, "loss": 1.8428, "step": 33400 }, { "epoch": 49.48, "learning_rate": 8.665620094191522e-07, "loss": 1.8428, "step": 33500 }, { "epoch": 49.48, "eval_loss": 0.8815611600875854, "eval_runtime": 117.0058, "eval_samples_per_second": 17.273, "eval_steps_per_second": 2.162, "eval_wer": 2.066798614547254, "step": 33500 }, { "epoch": 49.63, "learning_rate": 6.310832025117738e-07, "loss": 1.8284, "step": 33600 }, { "epoch": 49.78, "learning_rate": 3.9560439560439557e-07, "loss": 1.8226, "step": 33700 }, { "epoch": 49.93, "learning_rate": 1.6012558869701725e-07, "loss": 1.8287, "step": 33800 }, { "epoch": 50.0, "step": 33850, "total_flos": 1.54029172542989e+20, "train_loss": 4.34445102640938, "train_runtime": 69888.62, "train_samples_per_second": 15.505, "train_steps_per_second": 0.484 } ], "max_steps": 33850, "num_train_epochs": 50, "total_flos": 1.54029172542989e+20, "trial_name": null, "trial_params": null }