|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 21.231422505307854, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.901623249053955, |
|
"eval_runtime": 78.4215, |
|
"eval_samples_per_second": 35.985, |
|
"eval_steps_per_second": 3.009, |
|
"eval_wer": 0.9995142002989537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.970000000000001e-05, |
|
"loss": 2.877, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.981178343296051, |
|
"eval_runtime": 78.2116, |
|
"eval_samples_per_second": 36.082, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 0.3484678624813154, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 0.784228503704071, |
|
"eval_runtime": 78.8199, |
|
"eval_samples_per_second": 35.803, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 0.273168908819133, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 8.892222222222223e-05, |
|
"loss": 0.7834, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_loss": 0.6962451934814453, |
|
"eval_runtime": 78.959, |
|
"eval_samples_per_second": 35.74, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 0.21917040358744394, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 0.6526582837104797, |
|
"eval_runtime": 78.1018, |
|
"eval_samples_per_second": 36.132, |
|
"eval_steps_per_second": 3.022, |
|
"eval_wer": 0.20418535127055307, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 7.78111111111111e-05, |
|
"loss": 0.6084, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 0.6220057606697083, |
|
"eval_runtime": 79.4519, |
|
"eval_samples_per_second": 35.518, |
|
"eval_steps_per_second": 2.97, |
|
"eval_wer": 0.19715994020926755, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_loss": 0.6441554427146912, |
|
"eval_runtime": 80.6733, |
|
"eval_samples_per_second": 34.981, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.19338565022421525, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.670000000000001e-05, |
|
"loss": 0.5147, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 0.6793447732925415, |
|
"eval_runtime": 80.545, |
|
"eval_samples_per_second": 35.036, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 0.19495515695067264, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"eval_loss": 0.6431596875190735, |
|
"eval_runtime": 83.1308, |
|
"eval_samples_per_second": 33.947, |
|
"eval_steps_per_second": 2.839, |
|
"eval_wer": 0.19200298953662182, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 5.558888888888889e-05, |
|
"loss": 0.4566, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"eval_loss": 0.6605134010314941, |
|
"eval_runtime": 80.2378, |
|
"eval_samples_per_second": 35.17, |
|
"eval_steps_per_second": 2.941, |
|
"eval_wer": 0.1853139013452915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_loss": 0.6392588019371033, |
|
"eval_runtime": 78.2211, |
|
"eval_samples_per_second": 36.077, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 0.18658445440956653, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 4.448888888888889e-05, |
|
"loss": 0.4155, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"eval_loss": 0.6917754411697388, |
|
"eval_runtime": 78.1406, |
|
"eval_samples_per_second": 36.114, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 0.18030642750373693, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"eval_loss": 0.6514243483543396, |
|
"eval_runtime": 78.7289, |
|
"eval_samples_per_second": 35.845, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 0.17911061285500748, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 3.337777777777778e-05, |
|
"loss": 0.372, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"eval_loss": 0.7009950876235962, |
|
"eval_runtime": 78.8286, |
|
"eval_samples_per_second": 35.799, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 0.1851270553064275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"eval_loss": 0.6824212670326233, |
|
"eval_runtime": 79.8558, |
|
"eval_samples_per_second": 35.339, |
|
"eval_steps_per_second": 2.955, |
|
"eval_wer": 0.17858744394618833, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 2.2277777777777778e-05, |
|
"loss": 0.3368, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_loss": 0.6895105838775635, |
|
"eval_runtime": 81.107, |
|
"eval_samples_per_second": 34.794, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 0.1780269058295964, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"eval_loss": 0.7150452136993408, |
|
"eval_runtime": 79.9454, |
|
"eval_samples_per_second": 35.299, |
|
"eval_steps_per_second": 2.952, |
|
"eval_wer": 0.17589686098654708, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 1.1177777777777779e-05, |
|
"loss": 0.3244, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"eval_loss": 0.7141156792640686, |
|
"eval_runtime": 85.0058, |
|
"eval_samples_per_second": 33.198, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 0.17589686098654708, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"eval_loss": 0.7225230932235718, |
|
"eval_runtime": 77.8748, |
|
"eval_samples_per_second": 36.238, |
|
"eval_steps_per_second": 3.031, |
|
"eval_wer": 0.17556053811659192, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"learning_rate": 6.666666666666667e-08, |
|
"loss": 0.2981, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"eval_loss": 0.7286972403526306, |
|
"eval_runtime": 78.6838, |
|
"eval_samples_per_second": 35.865, |
|
"eval_steps_per_second": 2.999, |
|
"eval_wer": 0.17556053811659192, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"step": 10000, |
|
"total_flos": 2.495144460541511e+19, |
|
"train_loss": 0.6986962738037109, |
|
"train_runtime": 10488.9956, |
|
"train_samples_per_second": 22.881, |
|
"train_steps_per_second": 0.953 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 22, |
|
"total_flos": 2.495144460541511e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|