{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.862593783494106, "global_step": 11500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 3.7125e-06, "loss": 16.3142, "step": 100 }, { "epoch": 0.17, "learning_rate": 7.4625e-06, "loss": 9.7017, "step": 200 }, { "epoch": 0.26, "learning_rate": 1.1212499999999998e-05, "loss": 6.7433, "step": 300 }, { "epoch": 0.34, "learning_rate": 1.49625e-05, "loss": 5.7032, "step": 400 }, { "epoch": 0.43, "learning_rate": 1.8712499999999997e-05, "loss": 4.7972, "step": 500 }, { "epoch": 0.43, "eval_loss": 5.140112400054932, "eval_runtime": 323.0627, "eval_samples_per_second": 31.833, "eval_steps_per_second": 3.981, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.51, "learning_rate": 2.2462499999999997e-05, "loss": 4.1097, "step": 600 }, { "epoch": 0.6, "learning_rate": 2.6212499999999997e-05, "loss": 3.6756, "step": 700 }, { "epoch": 0.69, "learning_rate": 2.99625e-05, "loss": 3.5057, "step": 800 }, { "epoch": 0.77, "learning_rate": 3.37125e-05, "loss": 3.4116, "step": 900 }, { "epoch": 0.86, "learning_rate": 3.7462499999999996e-05, "loss": 3.3241, "step": 1000 }, { "epoch": 0.86, "eval_loss": 3.3219833374023438, "eval_runtime": 316.4143, "eval_samples_per_second": 32.502, "eval_steps_per_second": 4.064, "eval_wer": 1.0, "step": 1000 }, { "epoch": 0.94, "learning_rate": 4.12125e-05, "loss": 3.2968, "step": 1100 }, { "epoch": 1.03, "learning_rate": 4.4962499999999995e-05, "loss": 3.2757, "step": 1200 }, { "epoch": 1.11, "learning_rate": 4.871249999999999e-05, "loss": 3.2343, "step": 1300 }, { "epoch": 1.2, "learning_rate": 5.2462499999999994e-05, "loss": 3.1872, "step": 1400 }, { "epoch": 1.29, "learning_rate": 5.62125e-05, "loss": 3.1432, "step": 1500 }, { "epoch": 1.29, "eval_loss": 3.080551862716675, "eval_runtime": 318.2154, "eval_samples_per_second": 32.318, "eval_steps_per_second": 4.041, "eval_wer": 0.9998891086181085, "step": 1500 }, { "epoch": 1.37, "learning_rate": 5.9962499999999994e-05, "loss": 3.0996, "step": 1600 }, { "epoch": 1.46, "learning_rate": 6.37125e-05, "loss": 3.0582, "step": 1700 }, { "epoch": 1.54, "learning_rate": 6.746249999999999e-05, "loss": 3.0313, "step": 1800 }, { "epoch": 1.63, "learning_rate": 7.121249999999999e-05, "loss": 2.9824, "step": 1900 }, { "epoch": 1.72, "learning_rate": 7.49625e-05, "loss": 2.9297, "step": 2000 }, { "epoch": 1.72, "eval_loss": 2.567805528640747, "eval_runtime": 317.0688, "eval_samples_per_second": 32.435, "eval_steps_per_second": 4.056, "eval_wer": 1.005729388064391, "step": 2000 }, { "epoch": 1.8, "learning_rate": 7.423136645962732e-05, "loss": 2.8167, "step": 2100 }, { "epoch": 1.89, "learning_rate": 7.345496894409937e-05, "loss": 2.6315, "step": 2200 }, { "epoch": 1.97, "learning_rate": 7.26863354037267e-05, "loss": 2.475, "step": 2300 }, { "epoch": 2.06, "learning_rate": 7.190993788819875e-05, "loss": 2.3571, "step": 2400 }, { "epoch": 2.14, "learning_rate": 7.11335403726708e-05, "loss": 2.2593, "step": 2500 }, { "epoch": 2.14, "eval_loss": 1.1067590713500977, "eval_runtime": 319.3729, "eval_samples_per_second": 32.201, "eval_steps_per_second": 4.027, "eval_wer": 0.821834513094424, "step": 2500 }, { "epoch": 2.23, "learning_rate": 7.035714285714285e-05, "loss": 2.1855, "step": 2600 }, { "epoch": 2.32, "learning_rate": 6.95807453416149e-05, "loss": 2.1429, "step": 2700 }, { "epoch": 2.4, "learning_rate": 6.880434782608696e-05, "loss": 2.1143, "step": 2800 }, { "epoch": 2.49, "learning_rate": 6.802795031055901e-05, "loss": 2.081, "step": 2900 }, { "epoch": 2.57, "learning_rate": 6.725155279503105e-05, "loss": 2.0504, "step": 3000 }, { "epoch": 2.57, "eval_loss": 0.7877965569496155, "eval_runtime": 316.6537, "eval_samples_per_second": 32.477, "eval_steps_per_second": 4.061, "eval_wer": 0.7113866967305524, "step": 3000 }, { "epoch": 2.66, "learning_rate": 6.64751552795031e-05, "loss": 2.014, "step": 3100 }, { "epoch": 2.74, "learning_rate": 6.569875776397515e-05, "loss": 1.9885, "step": 3200 }, { "epoch": 2.83, "learning_rate": 6.49223602484472e-05, "loss": 1.9532, "step": 3300 }, { "epoch": 2.92, "learning_rate": 6.414596273291925e-05, "loss": 1.936, "step": 3400 }, { "epoch": 3.0, "learning_rate": 6.33695652173913e-05, "loss": 1.937, "step": 3500 }, { "epoch": 3.0, "eval_loss": 0.6955012679100037, "eval_runtime": 319.8543, "eval_samples_per_second": 32.152, "eval_steps_per_second": 4.021, "eval_wer": 0.6449812408745633, "step": 3500 }, { "epoch": 3.09, "learning_rate": 6.259316770186334e-05, "loss": 1.9074, "step": 3600 }, { "epoch": 3.17, "learning_rate": 6.18167701863354e-05, "loss": 1.8975, "step": 3700 }, { "epoch": 3.26, "learning_rate": 6.104037267080745e-05, "loss": 1.8628, "step": 3800 }, { "epoch": 3.34, "learning_rate": 6.02639751552795e-05, "loss": 1.8605, "step": 3900 }, { "epoch": 3.43, "learning_rate": 5.948757763975155e-05, "loss": 1.8491, "step": 4000 }, { "epoch": 3.43, "eval_loss": 0.645221471786499, "eval_runtime": 318.4935, "eval_samples_per_second": 32.29, "eval_steps_per_second": 4.038, "eval_wer": 0.6303620603618756, "step": 4000 }, { "epoch": 3.52, "learning_rate": 5.87111801242236e-05, "loss": 1.85, "step": 4100 }, { "epoch": 3.6, "learning_rate": 5.7934782608695654e-05, "loss": 1.8298, "step": 4200 }, { "epoch": 3.69, "learning_rate": 5.715838509316769e-05, "loss": 1.8108, "step": 4300 }, { "epoch": 3.77, "learning_rate": 5.6381987577639744e-05, "loss": 1.8136, "step": 4400 }, { "epoch": 3.86, "learning_rate": 5.5605590062111795e-05, "loss": 1.803, "step": 4500 }, { "epoch": 3.86, "eval_loss": 0.5961059927940369, "eval_runtime": 322.5472, "eval_samples_per_second": 31.884, "eval_steps_per_second": 3.987, "eval_wer": 0.6041547304415326, "step": 4500 }, { "epoch": 3.94, "learning_rate": 5.482919254658385e-05, "loss": 1.7829, "step": 4600 }, { "epoch": 4.03, "learning_rate": 5.40527950310559e-05, "loss": 1.7755, "step": 4700 }, { "epoch": 4.12, "learning_rate": 5.327639751552795e-05, "loss": 1.7805, "step": 4800 }, { "epoch": 4.2, "learning_rate": 5.2499999999999995e-05, "loss": 1.7647, "step": 4900 }, { "epoch": 4.29, "learning_rate": 5.173136645962733e-05, "loss": 1.7545, "step": 5000 }, { "epoch": 4.29, "eval_loss": 0.5550380945205688, "eval_runtime": 317.6329, "eval_samples_per_second": 32.377, "eval_steps_per_second": 4.049, "eval_wer": 0.5747500323433197, "step": 5000 }, { "epoch": 4.37, "learning_rate": 5.095496894409938e-05, "loss": 1.735, "step": 5100 }, { "epoch": 4.46, "learning_rate": 5.017857142857142e-05, "loss": 1.7365, "step": 5200 }, { "epoch": 4.55, "learning_rate": 4.940217391304347e-05, "loss": 1.734, "step": 5300 }, { "epoch": 4.63, "learning_rate": 4.862577639751552e-05, "loss": 1.7254, "step": 5400 }, { "epoch": 4.72, "learning_rate": 4.7849378881987574e-05, "loss": 1.7045, "step": 5500 }, { "epoch": 4.72, "eval_loss": 0.5373523831367493, "eval_runtime": 318.8226, "eval_samples_per_second": 32.256, "eval_steps_per_second": 4.034, "eval_wer": 0.5743064668157539, "step": 5500 }, { "epoch": 4.8, "learning_rate": 4.7072981366459626e-05, "loss": 1.7031, "step": 5600 }, { "epoch": 4.89, "learning_rate": 4.629658385093168e-05, "loss": 1.7135, "step": 5700 }, { "epoch": 4.97, "learning_rate": 4.5520186335403715e-05, "loss": 1.7053, "step": 5800 }, { "epoch": 5.06, "learning_rate": 4.474378881987577e-05, "loss": 1.7078, "step": 5900 }, { "epoch": 5.15, "learning_rate": 4.396739130434782e-05, "loss": 1.6733, "step": 6000 }, { "epoch": 5.15, "eval_loss": 0.5336768627166748, "eval_runtime": 321.7447, "eval_samples_per_second": 31.963, "eval_steps_per_second": 3.997, "eval_wer": 0.5403737039569741, "step": 6000 }, { "epoch": 5.23, "learning_rate": 4.319099378881987e-05, "loss": 1.6737, "step": 6100 }, { "epoch": 5.32, "learning_rate": 4.241459627329192e-05, "loss": 1.6939, "step": 6200 }, { "epoch": 5.4, "learning_rate": 4.1638198757763974e-05, "loss": 1.674, "step": 6300 }, { "epoch": 5.49, "learning_rate": 4.0861801242236026e-05, "loss": 1.6685, "step": 6400 }, { "epoch": 5.57, "learning_rate": 4.008540372670807e-05, "loss": 1.6761, "step": 6500 }, { "epoch": 5.57, "eval_loss": 0.5054484009742737, "eval_runtime": 316.5106, "eval_samples_per_second": 32.492, "eval_steps_per_second": 4.063, "eval_wer": 0.5265862088084721, "step": 6500 }, { "epoch": 5.66, "learning_rate": 3.930900621118012e-05, "loss": 1.6729, "step": 6600 }, { "epoch": 5.75, "learning_rate": 3.853260869565217e-05, "loss": 1.6521, "step": 6700 }, { "epoch": 5.83, "learning_rate": 3.775621118012422e-05, "loss": 1.6876, "step": 6800 }, { "epoch": 5.92, "learning_rate": 3.697981366459627e-05, "loss": 1.6499, "step": 6900 }, { "epoch": 6.0, "learning_rate": 3.620341614906832e-05, "loss": 1.655, "step": 7000 }, { "epoch": 6.0, "eval_loss": 0.492550253868103, "eval_runtime": 321.8948, "eval_samples_per_second": 31.948, "eval_steps_per_second": 3.995, "eval_wer": 0.5243314173766795, "step": 7000 }, { "epoch": 6.09, "learning_rate": 3.543478260869565e-05, "loss": 1.6501, "step": 7100 }, { "epoch": 6.17, "learning_rate": 3.4658385093167694e-05, "loss": 1.653, "step": 7200 }, { "epoch": 6.26, "learning_rate": 3.3881987577639746e-05, "loss": 1.6381, "step": 7300 }, { "epoch": 6.35, "learning_rate": 3.31055900621118e-05, "loss": 1.6231, "step": 7400 }, { "epoch": 6.43, "learning_rate": 3.232919254658385e-05, "loss": 1.6252, "step": 7500 }, { "epoch": 6.43, "eval_loss": 0.49458181858062744, "eval_runtime": 317.6079, "eval_samples_per_second": 32.38, "eval_steps_per_second": 4.049, "eval_wer": 0.5182508732696324, "step": 7500 }, { "epoch": 6.52, "learning_rate": 3.1552795031055894e-05, "loss": 1.6409, "step": 7600 }, { "epoch": 6.6, "learning_rate": 3.0776397515527946e-05, "loss": 1.6256, "step": 7700 }, { "epoch": 6.69, "learning_rate": 2.9999999999999997e-05, "loss": 1.6113, "step": 7800 }, { "epoch": 6.78, "learning_rate": 2.9223602484472046e-05, "loss": 1.6096, "step": 7900 }, { "epoch": 6.86, "learning_rate": 2.8447204968944097e-05, "loss": 1.6209, "step": 8000 }, { "epoch": 6.86, "eval_loss": 0.491542249917984, "eval_runtime": 319.2369, "eval_samples_per_second": 32.214, "eval_steps_per_second": 4.028, "eval_wer": 0.5193597870885468, "step": 8000 }, { "epoch": 6.95, "learning_rate": 2.767080745341615e-05, "loss": 1.5917, "step": 8100 }, { "epoch": 7.03, "learning_rate": 2.6894409937888194e-05, "loss": 1.6049, "step": 8200 }, { "epoch": 7.12, "learning_rate": 2.6118012422360246e-05, "loss": 1.5891, "step": 8300 }, { "epoch": 7.2, "learning_rate": 2.5341614906832297e-05, "loss": 1.5915, "step": 8400 }, { "epoch": 7.29, "learning_rate": 2.4565217391304346e-05, "loss": 1.5772, "step": 8500 }, { "epoch": 7.29, "eval_loss": 0.47245877981185913, "eval_runtime": 314.2017, "eval_samples_per_second": 32.731, "eval_steps_per_second": 4.093, "eval_wer": 0.5103775851553404, "step": 8500 }, { "epoch": 7.38, "learning_rate": 2.3788819875776394e-05, "loss": 1.5932, "step": 8600 }, { "epoch": 7.46, "learning_rate": 2.3012422360248446e-05, "loss": 1.574, "step": 8700 }, { "epoch": 7.55, "learning_rate": 2.2236024844720494e-05, "loss": 1.5818, "step": 8800 }, { "epoch": 7.63, "learning_rate": 2.1459627329192546e-05, "loss": 1.5852, "step": 8900 }, { "epoch": 7.72, "learning_rate": 2.0683229813664594e-05, "loss": 1.5602, "step": 9000 }, { "epoch": 7.72, "eval_loss": 0.47263726592063904, "eval_runtime": 323.393, "eval_samples_per_second": 31.8, "eval_steps_per_second": 3.977, "eval_wer": 0.509656791173046, "step": 9000 }, { "epoch": 7.8, "learning_rate": 1.9906832298136646e-05, "loss": 1.5737, "step": 9100 }, { "epoch": 7.89, "learning_rate": 1.9130434782608694e-05, "loss": 1.5894, "step": 9200 }, { "epoch": 7.98, "learning_rate": 1.8361801242236024e-05, "loss": 1.5694, "step": 9300 }, { "epoch": 8.06, "learning_rate": 1.7585403726708073e-05, "loss": 1.537, "step": 9400 }, { "epoch": 8.15, "learning_rate": 1.680900621118012e-05, "loss": 1.5783, "step": 9500 }, { "epoch": 8.15, "eval_loss": 0.4666772186756134, "eval_runtime": 318.3833, "eval_samples_per_second": 32.301, "eval_steps_per_second": 4.039, "eval_wer": 0.49557358567283344, "step": 9500 }, { "epoch": 8.23, "learning_rate": 1.604037267080745e-05, "loss": 1.5432, "step": 9600 }, { "epoch": 8.32, "learning_rate": 1.5263975155279503e-05, "loss": 1.5439, "step": 9700 }, { "epoch": 8.4, "learning_rate": 1.4487577639751551e-05, "loss": 1.5589, "step": 9800 }, { "epoch": 8.49, "learning_rate": 1.3711180124223601e-05, "loss": 1.5573, "step": 9900 }, { "epoch": 8.58, "learning_rate": 1.2934782608695651e-05, "loss": 1.5442, "step": 10000 }, { "epoch": 8.58, "eval_loss": 0.46852901577949524, "eval_runtime": 317.9433, "eval_samples_per_second": 32.345, "eval_steps_per_second": 4.045, "eval_wer": 0.49374387787162477, "step": 10000 }, { "epoch": 8.66, "learning_rate": 1.2158385093167701e-05, "loss": 1.5679, "step": 10100 }, { "epoch": 8.75, "learning_rate": 1.1381987577639751e-05, "loss": 1.5604, "step": 10200 }, { "epoch": 8.83, "learning_rate": 1.06055900621118e-05, "loss": 1.5458, "step": 10300 }, { "epoch": 8.92, "learning_rate": 9.829192546583851e-06, "loss": 1.5501, "step": 10400 }, { "epoch": 9.01, "learning_rate": 9.0527950310559e-06, "loss": 1.5597, "step": 10500 }, { "epoch": 9.01, "eval_loss": 0.4707895815372467, "eval_runtime": 323.6683, "eval_samples_per_second": 31.773, "eval_steps_per_second": 3.973, "eval_wer": 0.495665995157743, "step": 10500 }, { "epoch": 9.09, "learning_rate": 8.27639751552795e-06, "loss": 1.5189, "step": 10600 }, { "epoch": 9.18, "learning_rate": 7.499999999999999e-06, "loss": 1.5272, "step": 10700 }, { "epoch": 9.26, "learning_rate": 6.7236024844720485e-06, "loss": 1.5319, "step": 10800 }, { "epoch": 9.35, "learning_rate": 5.9472049689440985e-06, "loss": 1.5175, "step": 10900 }, { "epoch": 9.43, "learning_rate": 5.1708074534161485e-06, "loss": 1.5406, "step": 11000 }, { "epoch": 9.43, "eval_loss": 0.45390617847442627, "eval_runtime": 320.1014, "eval_samples_per_second": 32.127, "eval_steps_per_second": 4.017, "eval_wer": 0.48104681464505517, "step": 11000 }, { "epoch": 9.52, "learning_rate": 4.3944099378881985e-06, "loss": 1.5326, "step": 11100 }, { "epoch": 9.61, "learning_rate": 3.6180124223602485e-06, "loss": 1.5142, "step": 11200 }, { "epoch": 9.69, "learning_rate": 2.8416149068322976e-06, "loss": 1.5195, "step": 11300 }, { "epoch": 9.78, "learning_rate": 2.0652173913043476e-06, "loss": 1.5147, "step": 11400 }, { "epoch": 9.86, "learning_rate": 1.2888198757763974e-06, "loss": 1.5274, "step": 11500 }, { "epoch": 9.86, "eval_loss": 0.45016008615493774, "eval_runtime": 317.8491, "eval_samples_per_second": 32.355, "eval_steps_per_second": 4.046, "eval_wer": 0.4782560482007873, "step": 11500 }, { "epoch": 9.86, "step": 11500, "total_flos": 3.154318018894781e+19, "train_loss": 0.0, "train_runtime": 3.4176, "train_samples_per_second": 54596.703, "train_steps_per_second": 1705.873 } ], "max_steps": 5830, "num_train_epochs": 5, "total_flos": 3.154318018894781e+19, "trial_name": null, "trial_params": null }