|
{ |
|
"best_metric": 0.441910982131958, |
|
"best_model_checkpoint": "./Hubert-common_voice-ja-demo-roma-debug-40epochs-cosine/checkpoint-9700", |
|
"epoch": 40.0, |
|
"eval_steps": 100, |
|
"global_step": 15040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_cer": 3.477914534858724, |
|
"eval_loss": 16.817241668701172, |
|
"eval_runtime": 221.1037, |
|
"eval_samples_per_second": 22.428, |
|
"eval_steps_per_second": 2.804, |
|
"eval_wer": 2.9493849566444847, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_cer": 2.9561261222527526, |
|
"eval_loss": 16.550180435180664, |
|
"eval_runtime": 213.5908, |
|
"eval_samples_per_second": 23.217, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 2.7606372252470255, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_cer": 1.6838657478883865, |
|
"eval_loss": 15.8340425491333, |
|
"eval_runtime": 213.9597, |
|
"eval_samples_per_second": 23.177, |
|
"eval_steps_per_second": 2.898, |
|
"eval_wer": 1.9100625126033475, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_cer": 0.9275934445195761, |
|
"eval_loss": 13.291926383972168, |
|
"eval_runtime": 211.7589, |
|
"eval_samples_per_second": 23.418, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 41.734230041503906, |
|
"learning_rate": 1.1904e-06, |
|
"loss": 12.6588, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 7.879184246063232, |
|
"eval_runtime": 213.1723, |
|
"eval_samples_per_second": 23.263, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 6.154232501983643, |
|
"eval_runtime": 212.0332, |
|
"eval_samples_per_second": 23.388, |
|
"eval_steps_per_second": 2.924, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.775670051574707, |
|
"eval_runtime": 210.8145, |
|
"eval_samples_per_second": 23.523, |
|
"eval_steps_per_second": 2.941, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.6188063621521, |
|
"eval_runtime": 211.2888, |
|
"eval_samples_per_second": 23.47, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.475332260131836, |
|
"eval_runtime": 208.2147, |
|
"eval_samples_per_second": 23.817, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 41.6483154296875, |
|
"learning_rate": 2.3904e-06, |
|
"loss": 5.2353, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.3238911628723145, |
|
"eval_runtime": 208.8415, |
|
"eval_samples_per_second": 23.745, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.167562961578369, |
|
"eval_runtime": 207.9905, |
|
"eval_samples_per_second": 23.842, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 5.008359432220459, |
|
"eval_runtime": 208.6683, |
|
"eval_samples_per_second": 23.765, |
|
"eval_steps_per_second": 2.971, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 4.840206146240234, |
|
"eval_runtime": 207.1918, |
|
"eval_samples_per_second": 23.934, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 4.670183181762695, |
|
"eval_runtime": 206.5028, |
|
"eval_samples_per_second": 24.014, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 25.022253036499023, |
|
"learning_rate": 3.5904e-06, |
|
"loss": 4.4502, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 4.495745658874512, |
|
"eval_runtime": 206.573, |
|
"eval_samples_per_second": 24.006, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 4.321905136108398, |
|
"eval_runtime": 208.0558, |
|
"eval_samples_per_second": 23.835, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 4.150215148925781, |
|
"eval_runtime": 213.3593, |
|
"eval_samples_per_second": 23.242, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.9856085777282715, |
|
"eval_runtime": 207.2181, |
|
"eval_samples_per_second": 23.931, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.834258794784546, |
|
"eval_runtime": 206.1057, |
|
"eval_samples_per_second": 24.06, |
|
"eval_steps_per_second": 3.008, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 12.537736892700195, |
|
"learning_rate": 4.7904e-06, |
|
"loss": 3.7863, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.690730333328247, |
|
"eval_runtime": 207.5053, |
|
"eval_samples_per_second": 23.898, |
|
"eval_steps_per_second": 2.988, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.5544023513793945, |
|
"eval_runtime": 207.2908, |
|
"eval_samples_per_second": 23.923, |
|
"eval_steps_per_second": 2.991, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.4332330226898193, |
|
"eval_runtime": 208.1267, |
|
"eval_samples_per_second": 23.827, |
|
"eval_steps_per_second": 2.979, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_cer": 0.9275817187616037, |
|
"eval_loss": 3.3063294887542725, |
|
"eval_runtime": 207.0236, |
|
"eval_samples_per_second": 23.954, |
|
"eval_steps_per_second": 2.995, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.2074904441833496, |
|
"eval_runtime": 207.8656, |
|
"eval_samples_per_second": 23.857, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 6.3363356590271, |
|
"learning_rate": 5.9904e-06, |
|
"loss": 3.2473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.1271653175354004, |
|
"eval_runtime": 209.3454, |
|
"eval_samples_per_second": 23.688, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.0657362937927246, |
|
"eval_runtime": 209.9086, |
|
"eval_samples_per_second": 23.625, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 3.0163941383361816, |
|
"eval_runtime": 209.8668, |
|
"eval_samples_per_second": 23.629, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.9747605323791504, |
|
"eval_runtime": 209.2203, |
|
"eval_samples_per_second": 23.702, |
|
"eval_steps_per_second": 2.963, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.9446794986724854, |
|
"eval_runtime": 208.1917, |
|
"eval_samples_per_second": 23.819, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 1.2298308610916138, |
|
"learning_rate": 7.190400000000001e-06, |
|
"loss": 2.9649, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.918757915496826, |
|
"eval_runtime": 208.2245, |
|
"eval_samples_per_second": 23.816, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.9005508422851562, |
|
"eval_runtime": 205.5088, |
|
"eval_samples_per_second": 24.13, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8877005577087402, |
|
"eval_runtime": 205.8033, |
|
"eval_samples_per_second": 24.096, |
|
"eval_steps_per_second": 3.013, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8712358474731445, |
|
"eval_runtime": 206.9211, |
|
"eval_samples_per_second": 23.966, |
|
"eval_steps_per_second": 2.996, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8528554439544678, |
|
"eval_runtime": 205.6952, |
|
"eval_samples_per_second": 24.108, |
|
"eval_steps_per_second": 3.014, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 1.270645022392273, |
|
"learning_rate": 8.3904e-06, |
|
"loss": 2.8673, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8438620567321777, |
|
"eval_runtime": 206.4519, |
|
"eval_samples_per_second": 24.02, |
|
"eval_steps_per_second": 3.003, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8312649726867676, |
|
"eval_runtime": 206.5101, |
|
"eval_samples_per_second": 24.013, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.8182146549224854, |
|
"eval_runtime": 206.3851, |
|
"eval_samples_per_second": 24.028, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.7310709953308105, |
|
"eval_runtime": 207.1152, |
|
"eval_samples_per_second": 23.943, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_cer": 0.9276012616915579, |
|
"eval_loss": 2.4996562004089355, |
|
"eval_runtime": 207.3947, |
|
"eval_samples_per_second": 23.911, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 2.881579875946045, |
|
"learning_rate": 9.5904e-06, |
|
"loss": 2.6801, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.8951130949356452, |
|
"eval_loss": 2.2397685050964355, |
|
"eval_runtime": 207.9025, |
|
"eval_samples_per_second": 23.853, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_cer": 0.6153560526408361, |
|
"eval_loss": 1.9111369848251343, |
|
"eval_runtime": 209.5566, |
|
"eval_samples_per_second": 23.664, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_cer": 0.4341266460032754, |
|
"eval_loss": 1.5446799993515015, |
|
"eval_runtime": 210.5656, |
|
"eval_samples_per_second": 23.551, |
|
"eval_steps_per_second": 2.944, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_cer": 0.3958537719809105, |
|
"eval_loss": 1.3182002305984497, |
|
"eval_runtime": 210.4243, |
|
"eval_samples_per_second": 23.567, |
|
"eval_steps_per_second": 2.946, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_cer": 0.3706043064800447, |
|
"eval_loss": 1.1702227592468262, |
|
"eval_runtime": 210.384, |
|
"eval_samples_per_second": 23.571, |
|
"eval_steps_per_second": 2.947, |
|
"eval_wer": 0.9995966928816293, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 3.1133100986480713, |
|
"learning_rate": 1.0790400000000001e-05, |
|
"loss": 1.5214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_cer": 0.32139911744128324, |
|
"eval_loss": 1.0558491945266724, |
|
"eval_runtime": 211.4587, |
|
"eval_samples_per_second": 23.451, |
|
"eval_steps_per_second": 2.932, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_cer": 0.3023643036658628, |
|
"eval_loss": 0.9717462658882141, |
|
"eval_runtime": 211.083, |
|
"eval_samples_per_second": 23.493, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_cer": 0.2873631506329955, |
|
"eval_loss": 0.8958828449249268, |
|
"eval_runtime": 215.6014, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 2.876, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.2747306007105809, |
|
"eval_loss": 0.8398593068122864, |
|
"eval_runtime": 211.3172, |
|
"eval_samples_per_second": 23.467, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 0.9977818108489614, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_cer": 0.2656900413137539, |
|
"eval_loss": 0.7890844345092773, |
|
"eval_runtime": 211.3236, |
|
"eval_samples_per_second": 23.466, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 0.9973785037305909, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 2.623598098754883, |
|
"learning_rate": 1.19904e-05, |
|
"loss": 0.8719, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_cer": 0.25801748701372307, |
|
"eval_loss": 0.7483934760093689, |
|
"eval_runtime": 210.0979, |
|
"eval_samples_per_second": 23.603, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 0.9979834644081468, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_cer": 0.2522914085371335, |
|
"eval_loss": 0.7144805788993835, |
|
"eval_runtime": 209.8962, |
|
"eval_samples_per_second": 23.626, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 0.9975801572897761, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_cer": 0.24809358718296481, |
|
"eval_loss": 0.6851741075515747, |
|
"eval_runtime": 209.9795, |
|
"eval_samples_per_second": 23.617, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 0.9975801572897761, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_cer": 0.24873068669947274, |
|
"eval_loss": 0.6617541313171387, |
|
"eval_runtime": 215.3335, |
|
"eval_samples_per_second": 23.029, |
|
"eval_steps_per_second": 2.879, |
|
"eval_wer": 0.9979834644081468, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_cer": 0.2476675513099626, |
|
"eval_loss": 0.6399756073951721, |
|
"eval_runtime": 210.9211, |
|
"eval_samples_per_second": 23.511, |
|
"eval_steps_per_second": 2.939, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 3.613203763961792, |
|
"learning_rate": 1.31904e-05, |
|
"loss": 0.6568, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_cer": 0.2448690037405168, |
|
"eval_loss": 0.6200099587440491, |
|
"eval_runtime": 208.8016, |
|
"eval_samples_per_second": 23.75, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.2420548218271076, |
|
"eval_loss": 0.6031706929206848, |
|
"eval_runtime": 208.8818, |
|
"eval_samples_per_second": 23.741, |
|
"eval_steps_per_second": 2.968, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"eval_cer": 0.2395025151750851, |
|
"eval_loss": 0.5874983668327332, |
|
"eval_runtime": 215.2428, |
|
"eval_samples_per_second": 23.039, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"eval_cer": 0.24089788037381715, |
|
"eval_loss": 0.5776127576828003, |
|
"eval_runtime": 208.795, |
|
"eval_samples_per_second": 23.751, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"eval_cer": 0.23602778222922294, |
|
"eval_loss": 0.5617058873176575, |
|
"eval_runtime": 209.2365, |
|
"eval_samples_per_second": 23.7, |
|
"eval_steps_per_second": 2.963, |
|
"eval_wer": 0.999395039322444, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 2.9839932918548584, |
|
"learning_rate": 1.43904e-05, |
|
"loss": 0.548, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"eval_cer": 0.23474576602422542, |
|
"eval_loss": 0.5485217571258545, |
|
"eval_runtime": 212.04, |
|
"eval_samples_per_second": 23.387, |
|
"eval_steps_per_second": 2.924, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"eval_cer": 0.23343248113130113, |
|
"eval_loss": 0.5394352078437805, |
|
"eval_runtime": 220.2211, |
|
"eval_samples_per_second": 22.518, |
|
"eval_steps_per_second": 2.815, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"eval_cer": 0.23171270329532884, |
|
"eval_loss": 0.5321902632713318, |
|
"eval_runtime": 221.3122, |
|
"eval_samples_per_second": 22.407, |
|
"eval_steps_per_second": 2.801, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"eval_cer": 0.23198630431468809, |
|
"eval_loss": 0.5243075489997864, |
|
"eval_runtime": 210.0529, |
|
"eval_samples_per_second": 23.608, |
|
"eval_steps_per_second": 2.952, |
|
"eval_wer": 0.9969751966122202, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.22724518950779177, |
|
"eval_loss": 0.5120757222175598, |
|
"eval_runtime": 207.2228, |
|
"eval_samples_per_second": 23.931, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 2.14619779586792, |
|
"learning_rate": 1.5590400000000002e-05, |
|
"loss": 0.4681, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"eval_cer": 0.22661981574925638, |
|
"eval_loss": 0.5070440173149109, |
|
"eval_runtime": 221.9297, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 2.794, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"eval_cer": 0.22627976876805278, |
|
"eval_loss": 0.5013606548309326, |
|
"eval_runtime": 207.1035, |
|
"eval_samples_per_second": 23.945, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 17.819148936170212, |
|
"eval_cer": 0.2241613151610142, |
|
"eval_loss": 0.4942595660686493, |
|
"eval_runtime": 207.4255, |
|
"eval_samples_per_second": 23.907, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"eval_cer": 0.2228128529941723, |
|
"eval_loss": 0.4929651618003845, |
|
"eval_runtime": 209.5569, |
|
"eval_samples_per_second": 23.664, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 18.351063829787233, |
|
"eval_cer": 0.2245013621422178, |
|
"eval_loss": 0.4968652129173279, |
|
"eval_runtime": 229.2892, |
|
"eval_samples_per_second": 21.628, |
|
"eval_steps_per_second": 2.704, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"grad_norm": 3.0399532318115234, |
|
"learning_rate": 1.6790399999999998e-05, |
|
"loss": 0.4198, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"eval_cer": 0.22252752621684052, |
|
"eval_loss": 0.48828795552253723, |
|
"eval_runtime": 211.562, |
|
"eval_samples_per_second": 23.44, |
|
"eval_steps_per_second": 2.931, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.882978723404257, |
|
"eval_cer": 0.22148784234327548, |
|
"eval_loss": 0.4804530441761017, |
|
"eval_runtime": 212.7369, |
|
"eval_samples_per_second": 23.31, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.22081556555284995, |
|
"eval_loss": 0.47766780853271484, |
|
"eval_runtime": 210.7245, |
|
"eval_samples_per_second": 23.533, |
|
"eval_steps_per_second": 2.942, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.414893617021278, |
|
"eval_cer": 0.22092500596059364, |
|
"eval_loss": 0.47176772356033325, |
|
"eval_runtime": 209.4286, |
|
"eval_samples_per_second": 23.679, |
|
"eval_steps_per_second": 2.96, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 19.680851063829788, |
|
"eval_cer": 0.21988532208702857, |
|
"eval_loss": 0.47212347388267517, |
|
"eval_runtime": 211.3146, |
|
"eval_samples_per_second": 23.467, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"grad_norm": 2.2681851387023926, |
|
"learning_rate": 1.79904e-05, |
|
"loss": 0.3795, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"eval_cer": 0.22045206705570125, |
|
"eval_loss": 0.4674960672855377, |
|
"eval_runtime": 211.1631, |
|
"eval_samples_per_second": 23.484, |
|
"eval_steps_per_second": 2.936, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.21276595744681, |
|
"eval_cer": 0.21619561691166986, |
|
"eval_loss": 0.46922406554222107, |
|
"eval_runtime": 210.3069, |
|
"eval_samples_per_second": 23.58, |
|
"eval_steps_per_second": 2.948, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 20.47872340425532, |
|
"eval_cer": 0.21727829523113423, |
|
"eval_loss": 0.47320201992988586, |
|
"eval_runtime": 208.0889, |
|
"eval_samples_per_second": 23.831, |
|
"eval_steps_per_second": 2.979, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 20.74468085106383, |
|
"eval_cer": 0.217309563919061, |
|
"eval_loss": 0.46535801887512207, |
|
"eval_runtime": 208.908, |
|
"eval_samples_per_second": 23.738, |
|
"eval_steps_per_second": 2.968, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 21.01063829787234, |
|
"eval_cer": 0.21583211841452118, |
|
"eval_loss": 0.4556586742401123, |
|
"eval_runtime": 207.993, |
|
"eval_samples_per_second": 23.842, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"grad_norm": 1.6410722732543945, |
|
"learning_rate": 1.9190400000000002e-05, |
|
"loss": 0.3504, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"eval_cer": 0.21436249008196304, |
|
"eval_loss": 0.4562208950519562, |
|
"eval_runtime": 209.2953, |
|
"eval_samples_per_second": 23.694, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.54255319148936, |
|
"eval_cer": 0.2143742158399356, |
|
"eval_loss": 0.4679461419582367, |
|
"eval_runtime": 207.9541, |
|
"eval_samples_per_second": 23.847, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 21.80851063829787, |
|
"eval_cer": 0.21689525380403132, |
|
"eval_loss": 0.45842525362968445, |
|
"eval_runtime": 208.594, |
|
"eval_samples_per_second": 23.773, |
|
"eval_steps_per_second": 2.972, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 22.074468085106382, |
|
"eval_cer": 0.2133853435842515, |
|
"eval_loss": 0.45605719089508057, |
|
"eval_runtime": 209.0293, |
|
"eval_samples_per_second": 23.724, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 22.340425531914892, |
|
"eval_cer": 0.21432731280804543, |
|
"eval_loss": 0.45950454473495483, |
|
"eval_runtime": 208.8441, |
|
"eval_samples_per_second": 23.745, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"grad_norm": 2.5605006217956543, |
|
"learning_rate": 2.03904e-05, |
|
"loss": 0.3134, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"eval_cer": 0.21549988860529926, |
|
"eval_loss": 0.4543595314025879, |
|
"eval_runtime": 207.7389, |
|
"eval_samples_per_second": 23.871, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.872340425531917, |
|
"eval_cer": 0.21343615520213252, |
|
"eval_loss": 0.4543818533420563, |
|
"eval_runtime": 210.3188, |
|
"eval_samples_per_second": 23.578, |
|
"eval_steps_per_second": 2.948, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 23.138297872340427, |
|
"eval_cer": 0.21289286174940492, |
|
"eval_loss": 0.45519956946372986, |
|
"eval_runtime": 210.0976, |
|
"eval_samples_per_second": 23.603, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 23.404255319148938, |
|
"eval_cer": 0.21207987586330893, |
|
"eval_loss": 0.45236727595329285, |
|
"eval_runtime": 209.5064, |
|
"eval_samples_per_second": 23.67, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 23.670212765957448, |
|
"eval_cer": 0.21132551876707564, |
|
"eval_loss": 0.45537006855010986, |
|
"eval_runtime": 209.1429, |
|
"eval_samples_per_second": 23.711, |
|
"eval_steps_per_second": 2.964, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"grad_norm": 1.3679530620574951, |
|
"learning_rate": 2.15904e-05, |
|
"loss": 0.3014, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"eval_cer": 0.21025847479157464, |
|
"eval_loss": 0.46165505051612854, |
|
"eval_runtime": 209.5827, |
|
"eval_samples_per_second": 23.661, |
|
"eval_steps_per_second": 2.958, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.20212765957447, |
|
"eval_cer": 0.21297103346922183, |
|
"eval_loss": 0.4606345295906067, |
|
"eval_runtime": 209.6362, |
|
"eval_samples_per_second": 23.655, |
|
"eval_steps_per_second": 2.958, |
|
"eval_wer": 0.9977818108489614, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 24.46808510638298, |
|
"eval_cer": 0.21053598439692472, |
|
"eval_loss": 0.4560868740081787, |
|
"eval_runtime": 209.0063, |
|
"eval_samples_per_second": 23.727, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 0.9973785037305909, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 24.73404255319149, |
|
"eval_cer": 0.20888656110878767, |
|
"eval_loss": 0.45663759112358093, |
|
"eval_runtime": 211.6105, |
|
"eval_samples_per_second": 23.435, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.21188444656376662, |
|
"eval_loss": 0.4485580623149872, |
|
"eval_runtime": 212.8182, |
|
"eval_samples_per_second": 23.302, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"grad_norm": 9.48050594329834, |
|
"learning_rate": 2.27904e-05, |
|
"loss": 0.2791, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"eval_cer": 0.21173982888210532, |
|
"eval_loss": 0.4541599452495575, |
|
"eval_runtime": 212.1403, |
|
"eval_samples_per_second": 23.376, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.53191489361702, |
|
"eval_cer": 0.20951193486732306, |
|
"eval_loss": 0.4539582133293152, |
|
"eval_runtime": 211.4226, |
|
"eval_samples_per_second": 23.455, |
|
"eval_steps_per_second": 2.933, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 25.79787234042553, |
|
"eval_cer": 0.20911325909625675, |
|
"eval_loss": 0.441910982131958, |
|
"eval_runtime": 213.904, |
|
"eval_samples_per_second": 23.183, |
|
"eval_steps_per_second": 2.898, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 26.06382978723404, |
|
"eval_cer": 0.20742474994821122, |
|
"eval_loss": 0.45685461163520813, |
|
"eval_runtime": 212.9009, |
|
"eval_samples_per_second": 23.293, |
|
"eval_steps_per_second": 2.912, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 26.329787234042552, |
|
"eval_cer": 0.20901163586049476, |
|
"eval_loss": 0.45425865054130554, |
|
"eval_runtime": 210.5454, |
|
"eval_samples_per_second": 23.553, |
|
"eval_steps_per_second": 2.945, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"grad_norm": 2.0923683643341064, |
|
"learning_rate": 2.39904e-05, |
|
"loss": 0.2564, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"eval_cer": 0.20882402373293413, |
|
"eval_loss": 0.4689125120639801, |
|
"eval_runtime": 211.8844, |
|
"eval_samples_per_second": 23.404, |
|
"eval_steps_per_second": 2.926, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.861702127659573, |
|
"eval_cer": 0.20887483535081514, |
|
"eval_loss": 0.45904749631881714, |
|
"eval_runtime": 213.2034, |
|
"eval_samples_per_second": 23.259, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 27.127659574468087, |
|
"eval_cer": 0.2093282313257533, |
|
"eval_loss": 0.4985686242580414, |
|
"eval_runtime": 211.4066, |
|
"eval_samples_per_second": 23.457, |
|
"eval_steps_per_second": 2.933, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 27.393617021276597, |
|
"eval_cer": 0.20998096518622458, |
|
"eval_loss": 0.46933168172836304, |
|
"eval_runtime": 210.8303, |
|
"eval_samples_per_second": 23.521, |
|
"eval_steps_per_second": 2.941, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 27.659574468085108, |
|
"eval_cer": 0.208472250993758, |
|
"eval_loss": 0.5127565860748291, |
|
"eval_runtime": 210.3509, |
|
"eval_samples_per_second": 23.575, |
|
"eval_steps_per_second": 2.947, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"grad_norm": 4.9774322509765625, |
|
"learning_rate": 2.51904e-05, |
|
"loss": 0.2449, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"eval_cer": 0.20994578791230697, |
|
"eval_loss": 0.4511733651161194, |
|
"eval_runtime": 213.3431, |
|
"eval_samples_per_second": 23.244, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 28.19148936170213, |
|
"eval_cer": 0.20911325909625675, |
|
"eval_loss": 0.4650718867778778, |
|
"eval_runtime": 212.7967, |
|
"eval_samples_per_second": 23.304, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 0.999395039322444, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 28.45744680851064, |
|
"eval_cer": 0.206795467603685, |
|
"eval_loss": 0.46036314964294434, |
|
"eval_runtime": 211.0991, |
|
"eval_samples_per_second": 23.491, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 28.72340425531915, |
|
"eval_cer": 0.20801885501881984, |
|
"eval_loss": 0.46872854232788086, |
|
"eval_runtime": 211.1242, |
|
"eval_samples_per_second": 23.489, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 28.98936170212766, |
|
"eval_cer": 0.20636943173068278, |
|
"eval_loss": 0.468840092420578, |
|
"eval_runtime": 212.878, |
|
"eval_samples_per_second": 23.295, |
|
"eval_steps_per_second": 2.912, |
|
"eval_wer": 0.999395039322444, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"grad_norm": 4.162810802459717, |
|
"learning_rate": 2.63904e-05, |
|
"loss": 0.2258, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"eval_cer": 0.20923051667598214, |
|
"eval_loss": 0.4758950173854828, |
|
"eval_runtime": 212.8392, |
|
"eval_samples_per_second": 23.299, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 0.999395039322444, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.52127659574468, |
|
"eval_cer": 0.20683064487760264, |
|
"eval_loss": 0.4816320836544037, |
|
"eval_runtime": 213.093, |
|
"eval_samples_per_second": 23.272, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 29.78723404255319, |
|
"eval_cer": 0.20534147361509028, |
|
"eval_loss": 0.4749976098537445, |
|
"eval_runtime": 212.0731, |
|
"eval_samples_per_second": 23.383, |
|
"eval_steps_per_second": 2.924, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 30.0531914893617, |
|
"eval_cer": 0.2047864544043901, |
|
"eval_loss": 0.47532856464385986, |
|
"eval_runtime": 213.0921, |
|
"eval_samples_per_second": 23.272, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 30.319148936170212, |
|
"eval_cer": 0.2059551216156531, |
|
"eval_loss": 0.48292842507362366, |
|
"eval_runtime": 212.9065, |
|
"eval_samples_per_second": 23.292, |
|
"eval_steps_per_second": 2.912, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 30.585106382978722, |
|
"grad_norm": 2.1135425567626953, |
|
"learning_rate": 2.7590400000000003e-05, |
|
"loss": 0.2124, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 30.585106382978722, |
|
"eval_cer": 0.20811266108260015, |
|
"eval_loss": 0.4800090193748474, |
|
"eval_runtime": 215.1825, |
|
"eval_samples_per_second": 23.046, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 30.851063829787233, |
|
"eval_cer": 0.20605283626542426, |
|
"eval_loss": 0.5290209054946899, |
|
"eval_runtime": 213.4725, |
|
"eval_samples_per_second": 23.23, |
|
"eval_steps_per_second": 2.904, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 31.117021276595743, |
|
"eval_cer": 0.20547045695278818, |
|
"eval_loss": 0.5368531942367554, |
|
"eval_runtime": 214.9789, |
|
"eval_samples_per_second": 23.067, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 0.9987900786448881, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 31.382978723404257, |
|
"eval_cer": 0.2040555488241019, |
|
"eval_loss": 0.5169620513916016, |
|
"eval_runtime": 214.7111, |
|
"eval_samples_per_second": 23.096, |
|
"eval_steps_per_second": 2.888, |
|
"eval_wer": 0.9977818108489614, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 31.648936170212767, |
|
"eval_cer": 0.20704170852110831, |
|
"eval_loss": 0.5228501558303833, |
|
"eval_runtime": 216.5107, |
|
"eval_samples_per_second": 22.904, |
|
"eval_steps_per_second": 2.864, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 31.914893617021278, |
|
"grad_norm": 4.3140645027160645, |
|
"learning_rate": 2.87904e-05, |
|
"loss": 0.2007, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 31.914893617021278, |
|
"eval_cer": 0.20597857313159817, |
|
"eval_loss": 0.5034534931182861, |
|
"eval_runtime": 215.0115, |
|
"eval_samples_per_second": 23.064, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 32.180851063829785, |
|
"eval_cer": 0.20488807764015213, |
|
"eval_loss": 0.5103185772895813, |
|
"eval_runtime": 214.9574, |
|
"eval_samples_per_second": 23.07, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 0.9973785037305909, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 32.4468085106383, |
|
"eval_cer": 0.2032073856640883, |
|
"eval_loss": 0.48684972524642944, |
|
"eval_runtime": 215.7924, |
|
"eval_samples_per_second": 22.98, |
|
"eval_steps_per_second": 2.873, |
|
"eval_wer": 0.9971768501714056, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 32.712765957446805, |
|
"eval_cer": 0.2042666124676076, |
|
"eval_loss": 0.4866673946380615, |
|
"eval_runtime": 215.5038, |
|
"eval_samples_per_second": 23.011, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.9995966928816293, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 32.97872340425532, |
|
"eval_cer": 0.20395001700234905, |
|
"eval_loss": 0.5048738718032837, |
|
"eval_runtime": 220.2273, |
|
"eval_samples_per_second": 22.518, |
|
"eval_steps_per_second": 2.815, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 33.244680851063826, |
|
"grad_norm": 3.023064613342285, |
|
"learning_rate": 2.99904e-05, |
|
"loss": 0.1867, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 33.244680851063826, |
|
"eval_cer": 0.20403600589414767, |
|
"eval_loss": 0.5125642418861389, |
|
"eval_runtime": 217.1156, |
|
"eval_samples_per_second": 22.84, |
|
"eval_steps_per_second": 2.856, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 33.51063829787234, |
|
"eval_cer": 0.20373895335884337, |
|
"eval_loss": 0.5321324467658997, |
|
"eval_runtime": 218.7742, |
|
"eval_samples_per_second": 22.667, |
|
"eval_steps_per_second": 2.834, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 33.776595744680854, |
|
"eval_cer": 0.20402428013617513, |
|
"eval_loss": 0.5186927914619446, |
|
"eval_runtime": 216.188, |
|
"eval_samples_per_second": 22.938, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 0.9977818108489614, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 34.04255319148936, |
|
"eval_cer": 0.20636943173068278, |
|
"eval_loss": 0.5318589806556702, |
|
"eval_runtime": 217.8562, |
|
"eval_samples_per_second": 22.763, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 34.308510638297875, |
|
"eval_cer": 0.2040711831680653, |
|
"eval_loss": 0.5274991989135742, |
|
"eval_runtime": 218.7876, |
|
"eval_samples_per_second": 22.666, |
|
"eval_steps_per_second": 2.834, |
|
"eval_wer": 0.9979834644081468, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 34.57446808510638, |
|
"grad_norm": 4.690342426300049, |
|
"learning_rate": 2.7264773627685888e-05, |
|
"loss": 0.1749, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 34.57446808510638, |
|
"eval_cer": 0.20429788115553435, |
|
"eval_loss": 0.543329656124115, |
|
"eval_runtime": 215.6846, |
|
"eval_samples_per_second": 22.992, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 0.9981851179673321, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 34.840425531914896, |
|
"eval_cer": 0.2023240452301571, |
|
"eval_loss": 0.5093534588813782, |
|
"eval_runtime": 216.6521, |
|
"eval_samples_per_second": 22.889, |
|
"eval_steps_per_second": 2.862, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 35.1063829787234, |
|
"eval_cer": 0.20042838102459673, |
|
"eval_loss": 0.5363013744354248, |
|
"eval_runtime": 216.3501, |
|
"eval_samples_per_second": 22.921, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 35.37234042553192, |
|
"eval_cer": 0.20222633058038594, |
|
"eval_loss": 0.5330685377120972, |
|
"eval_runtime": 215.7086, |
|
"eval_samples_per_second": 22.989, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.999395039322444, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 35.638297872340424, |
|
"eval_cer": 0.20093258861741586, |
|
"eval_loss": 0.5052955746650696, |
|
"eval_runtime": 213.6416, |
|
"eval_samples_per_second": 23.212, |
|
"eval_steps_per_second": 2.902, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 35.90425531914894, |
|
"grad_norm": 3.094069242477417, |
|
"learning_rate": 1.9986694366624682e-05, |
|
"loss": 0.1604, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 35.90425531914894, |
|
"eval_cer": 0.2026210977654614, |
|
"eval_loss": 0.5156988501548767, |
|
"eval_runtime": 221.1798, |
|
"eval_samples_per_second": 22.421, |
|
"eval_steps_per_second": 2.803, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 36.170212765957444, |
|
"eval_cer": 0.20183938056729217, |
|
"eval_loss": 0.5299074649810791, |
|
"eval_runtime": 215.0871, |
|
"eval_samples_per_second": 23.056, |
|
"eval_steps_per_second": 2.883, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 36.43617021276596, |
|
"eval_cer": 0.20501706097785005, |
|
"eval_loss": 0.5117177963256836, |
|
"eval_runtime": 214.4648, |
|
"eval_samples_per_second": 23.123, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.9995966928816293, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 36.702127659574465, |
|
"eval_cer": 0.2038327594226237, |
|
"eval_loss": 0.5067043900489807, |
|
"eval_runtime": 213.8511, |
|
"eval_samples_per_second": 23.189, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 0.999395039322444, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 36.96808510638298, |
|
"eval_cer": 0.20278525837707692, |
|
"eval_loss": 0.4994255602359772, |
|
"eval_runtime": 216.1066, |
|
"eval_samples_per_second": 22.947, |
|
"eval_steps_per_second": 2.869, |
|
"eval_wer": 0.9995966928816293, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 37.234042553191486, |
|
"grad_norm": 2.801684617996216, |
|
"learning_rate": 1.089715454232877e-05, |
|
"loss": 0.1412, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 37.234042553191486, |
|
"eval_cer": 0.20241394270794655, |
|
"eval_loss": 0.5346146821975708, |
|
"eval_runtime": 215.4888, |
|
"eval_samples_per_second": 23.013, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.9983867715265174, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_cer": 0.20152669368802448, |
|
"eval_loss": 0.5349961519241333, |
|
"eval_runtime": 213.0582, |
|
"eval_samples_per_second": 23.275, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 0.999395039322444, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 37.765957446808514, |
|
"eval_cer": 0.20097949164930604, |
|
"eval_loss": 0.5236727595329285, |
|
"eval_runtime": 211.9375, |
|
"eval_samples_per_second": 23.398, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 38.03191489361702, |
|
"eval_cer": 0.19928316532927884, |
|
"eval_loss": 0.5304979085922241, |
|
"eval_runtime": 212.7349, |
|
"eval_samples_per_second": 23.311, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 38.297872340425535, |
|
"eval_cer": 0.19734059809182833, |
|
"eval_loss": 0.5309327244758606, |
|
"eval_runtime": 214.8976, |
|
"eval_samples_per_second": 23.076, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.9985884250857028, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 38.56382978723404, |
|
"grad_norm": 1.7099072933197021, |
|
"learning_rate": 3.292383984744315e-06, |
|
"loss": 0.1286, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 38.56382978723404, |
|
"eval_cer": 0.19924407946937037, |
|
"eval_loss": 0.5269559621810913, |
|
"eval_runtime": 214.5389, |
|
"eval_samples_per_second": 23.115, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 38.829787234042556, |
|
"eval_cer": 0.19992026484578673, |
|
"eval_loss": 0.5362734198570251, |
|
"eval_runtime": 221.7091, |
|
"eval_samples_per_second": 22.367, |
|
"eval_steps_per_second": 2.796, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 39.09574468085106, |
|
"eval_cer": 0.19989681332984166, |
|
"eval_loss": 0.5347363352775574, |
|
"eval_runtime": 213.4871, |
|
"eval_samples_per_second": 23.229, |
|
"eval_steps_per_second": 2.904, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 39.361702127659576, |
|
"eval_cer": 0.19991244767380506, |
|
"eval_loss": 0.5318508744239807, |
|
"eval_runtime": 214.7174, |
|
"eval_samples_per_second": 23.095, |
|
"eval_steps_per_second": 2.888, |
|
"eval_wer": 0.9989917322040734, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 39.62765957446808, |
|
"eval_cer": 0.1994590516988669, |
|
"eval_loss": 0.5322226881980896, |
|
"eval_runtime": 213.8602, |
|
"eval_samples_per_second": 23.188, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 0.999395039322444, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 39.8936170212766, |
|
"grad_norm": 2.6511354446411133, |
|
"learning_rate": 2.4271246633469913e-08, |
|
"loss": 0.1217, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 39.8936170212766, |
|
"eval_cer": 0.19924017088337953, |
|
"eval_loss": 0.5321789383888245, |
|
"eval_runtime": 215.7329, |
|
"eval_samples_per_second": 22.987, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.9991933857632587, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 15040, |
|
"total_flos": 2.1018408189762245e+19, |
|
"train_loss": 1.5314149432993949, |
|
"train_runtime": 82631.5291, |
|
"train_samples_per_second": 5.824, |
|
"train_steps_per_second": 0.182 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1018408189762245e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|