|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.147606696533837, |
|
"eval_steps": 2000, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 15.1742, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002985, |
|
"loss": 3.8658, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002998088484992574, |
|
"loss": 1.8845, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002992281499624614, |
|
"loss": 1.54, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_cer": 0.26165714958754477, |
|
"eval_loss": 1.005660891532898, |
|
"eval_runtime": 2601.6674, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 0.407, |
|
"eval_wer": 0.6134988781327054, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002982593949446869, |
|
"loss": 1.3671, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002969051026198513, |
|
"loss": 1.2849, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00029517264575296907, |
|
"loss": 1.2354, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00029305958725988945, |
|
"loss": 1.1895, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_cer": 0.20404210489580651, |
|
"eval_loss": 0.7781721949577332, |
|
"eval_runtime": 2606.5317, |
|
"eval_samples_per_second": 6.489, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.5034607425730799, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0002905745131650867, |
|
"loss": 1.1348, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0002877238857151841, |
|
"loss": 1.0801, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00028452188731761346, |
|
"loss": 1.0722, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00028096401365270297, |
|
"loss": 1.0582, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_cer": 0.18262104285084868, |
|
"eval_loss": 0.6766561269760132, |
|
"eval_runtime": 2575.8771, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 0.411, |
|
"eval_wer": 0.4655154643262057, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0002770655780074579, |
|
"loss": 1.0283, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002728367179679716, |
|
"loss": 0.9818, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002682884303639032, |
|
"loss": 0.972, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0002634425531692912, |
|
"loss": 0.9586, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_cer": 0.1690433917735793, |
|
"eval_loss": 0.6272980570793152, |
|
"eval_runtime": 2527.907, |
|
"eval_samples_per_second": 6.691, |
|
"eval_steps_per_second": 0.419, |
|
"eval_wer": 0.43799601994783116, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00025829226958497964, |
|
"loss": 0.9543, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0002528603801705155, |
|
"loss": 0.9046, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00024716101014216497, |
|
"loss": 0.8973, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00024122112631130689, |
|
"loss": 0.8831, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_cer": 0.15516692331517162, |
|
"eval_loss": 0.5883975028991699, |
|
"eval_runtime": 2528.7031, |
|
"eval_samples_per_second": 6.689, |
|
"eval_steps_per_second": 0.418, |
|
"eval_wer": 0.40706736753641115, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00023503237291623731, |
|
"loss": 0.8808, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00022862249928740867, |
|
"loss": 0.8442, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00022202159516049143, |
|
"loss": 0.8334, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.0002152203749023848, |
|
"loss": 0.8318, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_cer": 0.14686983340385715, |
|
"eval_loss": 0.5510314106941223, |
|
"eval_runtime": 2595.7595, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 0.408, |
|
"eval_wer": 0.3897438336940751, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0002082495540080772, |
|
"loss": 0.8242, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00020112725956837873, |
|
"loss": 0.7965, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00019387201256956072, |
|
"loss": 0.7776, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0001865026797309365, |
|
"loss": 0.7725, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_cer": 0.1407080881358594, |
|
"eval_loss": 0.5327238440513611, |
|
"eval_runtime": 2579.4825, |
|
"eval_samples_per_second": 6.557, |
|
"eval_steps_per_second": 0.41, |
|
"eval_wer": 0.3726026528395532, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0001790534348212884, |
|
"loss": 0.7775, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00017152894049650538, |
|
"loss": 0.7543, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0001639334517198862, |
|
"loss": 0.7284, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00015630173005929936, |
|
"loss": 0.7254, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_cer": 0.1416372575107858, |
|
"eval_loss": 0.5081394910812378, |
|
"eval_runtime": 2577.7581, |
|
"eval_samples_per_second": 6.562, |
|
"eval_steps_per_second": 0.41, |
|
"eval_wer": 0.3675760530885046, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0001486536212278503, |
|
"loss": 0.7224, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00014100901355223894, |
|
"loss": 0.7136, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.00013338778625467495, |
|
"loss": 0.6742, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.00012580975775846912, |
|
"loss": 0.6802, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_cer": 0.13128552546054956, |
|
"eval_loss": 0.4845993220806122, |
|
"eval_runtime": 2577.5228, |
|
"eval_samples_per_second": 6.562, |
|
"eval_steps_per_second": 0.41, |
|
"eval_wer": 0.350228734073845, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.0001182946341517271, |
|
"loss": 0.6779, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00011087672812556355, |
|
"loss": 0.6727, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00010354560475282154, |
|
"loss": 0.6437, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 9.633528251009509e-05, |
|
"loss": 0.6386, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_cer": 0.12406592304034689, |
|
"eval_loss": 0.46761998534202576, |
|
"eval_runtime": 2525.8593, |
|
"eval_samples_per_second": 6.696, |
|
"eval_steps_per_second": 0.419, |
|
"eval_wer": 0.3343878093063451, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 8.926451129327824e-05, |
|
"loss": 0.6291, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 8.23653341292733e-05, |
|
"loss": 0.6257, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 7.562804579791497e-05, |
|
"loss": 0.608, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 6.909703945913943e-05, |
|
"loss": 0.5949, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_cer": 0.11851489942723033, |
|
"eval_loss": 0.45102670788764954, |
|
"eval_runtime": 2545.5284, |
|
"eval_samples_per_second": 6.645, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.3250481649739552, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 6.276312780656023e-05, |
|
"loss": 0.5946, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 5.665606902083949e-05, |
|
"loss": 0.5947, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 5.079174404496348e-05, |
|
"loss": 0.5854, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.519634802070792e-05, |
|
"loss": 0.5736, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.11609600544414732, |
|
"eval_loss": 0.4416215717792511, |
|
"eval_runtime": 2548.5938, |
|
"eval_samples_per_second": 6.637, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.31890366212369875, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 3.9862009784738855e-05, |
|
"loss": 0.5637, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.4814077035373634e-05, |
|
"loss": 0.5681, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.0065676538704393e-05, |
|
"loss": 0.563, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 2.562915614992792e-05, |
|
"loss": 0.5451, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_cer": 0.1143270937937593, |
|
"eval_loss": 0.43376967310905457, |
|
"eval_runtime": 2588.7798, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 0.409, |
|
"eval_wer": 0.31443204972686695, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 2.1523948888423446e-05, |
|
"loss": 0.5525, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 1.7744279871280954e-05, |
|
"loss": 0.5442, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 1.4308531819153024e-05, |
|
"loss": 0.5452, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1.1225639134154647e-05, |
|
"loss": 0.5375, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_cer": 0.11264106579653371, |
|
"eval_loss": 0.4286753833293915, |
|
"eval_runtime": 2584.5725, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.409, |
|
"eval_wer": 0.30952437583743625, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 8.508697672362313e-06, |
|
"loss": 0.5351, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 6.153885454076635e-06, |
|
"loss": 0.5365, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 4.173134142042017e-06, |
|
"loss": 0.5308, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 2.5715945297517193e-06, |
|
"loss": 0.5335, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_cer": 0.11220919834058202, |
|
"eval_loss": 0.42732492089271545, |
|
"eval_runtime": 2538.009, |
|
"eval_samples_per_second": 6.664, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.3079228409009823, |
|
"step": 30000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 31800, |
|
"num_train_epochs": 15, |
|
"save_steps": 2000, |
|
"total_flos": 2.014666123318961e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|