|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.297071129707113, |
|
"global_step": 10800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 6.6124, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 3.268874168395996, |
|
"eval_runtime": 47.7642, |
|
"eval_samples_per_second": 10.594, |
|
"eval_wer": 0.9499561018437226, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002924547283702213, |
|
"loss": Infinity, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.5890493392944336, |
|
"eval_runtime": 46.9733, |
|
"eval_samples_per_second": 10.772, |
|
"eval_wer": 0.9242025168276266, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002823943661971831, |
|
"loss": NaN, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.399864673614502, |
|
"eval_runtime": 49.6014, |
|
"eval_samples_per_second": 10.201, |
|
"eval_wer": 0.8653789874158618, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00027233400402414484, |
|
"loss": NaN, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.1861594915390015, |
|
"eval_runtime": 45.83, |
|
"eval_samples_per_second": 11.041, |
|
"eval_wer": 0.8627450980392157, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0002622736418511066, |
|
"loss": NaN, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 1.1273273229599, |
|
"eval_runtime": 47.5676, |
|
"eval_samples_per_second": 10.638, |
|
"eval_wer": 0.8124085455077553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002522132796780684, |
|
"loss": NaN, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 1.0673868656158447, |
|
"eval_runtime": 46.2685, |
|
"eval_samples_per_second": 10.936, |
|
"eval_wer": 0.8141644717588528, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00024215291750503016, |
|
"loss": NaN, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 1.012987732887268, |
|
"eval_runtime": 46.0398, |
|
"eval_samples_per_second": 10.991, |
|
"eval_wer": 0.8112379280070237, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00023209255533199194, |
|
"loss": NaN, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 1.0372620820999146, |
|
"eval_runtime": 47.1969, |
|
"eval_samples_per_second": 10.721, |
|
"eval_wer": 0.7860696517412935, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00022203219315895372, |
|
"loss": NaN, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 0.9511893391609192, |
|
"eval_runtime": 46.2728, |
|
"eval_samples_per_second": 10.935, |
|
"eval_wer": 0.7960199004975125, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00021197183098591548, |
|
"loss": NaN, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 0.9626357555389404, |
|
"eval_runtime": 46.2615, |
|
"eval_samples_per_second": 10.938, |
|
"eval_wer": 0.7573895229733685, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00020191146881287726, |
|
"loss": NaN, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 0.9149179458618164, |
|
"eval_runtime": 47.2032, |
|
"eval_samples_per_second": 10.72, |
|
"eval_wer": 0.755340942347088, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.000191851106639839, |
|
"loss": NaN, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 0.945462703704834, |
|
"eval_runtime": 47.6471, |
|
"eval_samples_per_second": 10.62, |
|
"eval_wer": 0.7515364354697103, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0001817907444668008, |
|
"loss": NaN, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_loss": 0.9152739644050598, |
|
"eval_runtime": 48.1688, |
|
"eval_samples_per_second": 10.505, |
|
"eval_wer": 0.746268656716418, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00017173038229376258, |
|
"loss": NaN, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 0.9364785552024841, |
|
"eval_runtime": 47.6477, |
|
"eval_samples_per_second": 10.62, |
|
"eval_wer": 0.7260755048287972, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00016167002012072433, |
|
"loss": NaN, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_loss": 0.9098660349845886, |
|
"eval_runtime": 46.4104, |
|
"eval_samples_per_second": 10.903, |
|
"eval_wer": 0.7380743342112964, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.0001516096579476861, |
|
"loss": NaN, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 0.9048272371292114, |
|
"eval_runtime": 46.2425, |
|
"eval_samples_per_second": 10.942, |
|
"eval_wer": 0.7205150717003219, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00014154929577464787, |
|
"loss": NaN, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 0.8889923691749573, |
|
"eval_runtime": 46.6074, |
|
"eval_samples_per_second": 10.857, |
|
"eval_wer": 0.7172958735733099, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00013148893360160965, |
|
"loss": NaN, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_loss": 0.870766818523407, |
|
"eval_runtime": 47.1858, |
|
"eval_samples_per_second": 10.724, |
|
"eval_wer": 0.7091015510681885, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00012142857142857142, |
|
"loss": NaN, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 0.8733641505241394, |
|
"eval_runtime": 47.6751, |
|
"eval_samples_per_second": 10.614, |
|
"eval_wer": 0.6971027216856892, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00011136820925553318, |
|
"loss": NaN, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 0.88031005859375, |
|
"eval_runtime": 47.4892, |
|
"eval_samples_per_second": 10.655, |
|
"eval_wer": 0.6947614866842259, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00010130784708249495, |
|
"loss": NaN, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 0.8849018216133118, |
|
"eval_runtime": 47.5503, |
|
"eval_samples_per_second": 10.641, |
|
"eval_wer": 0.6930055604331284, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 9.124748490945673e-05, |
|
"loss": NaN, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_loss": 0.8564967513084412, |
|
"eval_runtime": 46.9348, |
|
"eval_samples_per_second": 10.781, |
|
"eval_wer": 0.6903716710564823, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 8.11871227364185e-05, |
|
"loss": NaN, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_loss": 0.875277042388916, |
|
"eval_runtime": 46.126, |
|
"eval_samples_per_second": 10.97, |
|
"eval_wer": 0.6871524729294703, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 7.112676056338028e-05, |
|
"loss": NaN, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 0.836927056312561, |
|
"eval_runtime": 47.5639, |
|
"eval_samples_per_second": 10.638, |
|
"eval_wer": 0.6877377816798361, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 6.106639839034204e-05, |
|
"loss": NaN, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"eval_loss": 0.8403338193893433, |
|
"eval_runtime": 47.7824, |
|
"eval_samples_per_second": 10.59, |
|
"eval_wer": 0.6798361135498976, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 5.100603621730382e-05, |
|
"loss": NaN, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_loss": 0.8528462648391724, |
|
"eval_runtime": 47.7751, |
|
"eval_samples_per_second": 10.591, |
|
"eval_wer": 0.6865671641791045, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.094567404426559e-05, |
|
"loss": NaN, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_loss": 0.8435601592063904, |
|
"eval_runtime": 47.6913, |
|
"eval_samples_per_second": 10.61, |
|
"eval_wer": 0.6777875329236172, |
|
"step": 10800 |
|
} |
|
], |
|
"max_steps": 12428, |
|
"num_train_epochs": 13, |
|
"total_flos": 2.5666394845611536e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|