|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 40500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 4.6222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 5.947904586791992, |
|
"eval_runtime": 35.3873, |
|
"eval_samples_per_second": 21.505, |
|
"eval_steps_per_second": 1.356, |
|
"eval_wer": 0.5473815461346634, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.8084615384615386e-05, |
|
"loss": 1.1362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 7.979872226715088, |
|
"eval_runtime": 34.3508, |
|
"eval_samples_per_second": 22.154, |
|
"eval_steps_per_second": 1.397, |
|
"eval_wer": 0.509440684004275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.616153846153846e-05, |
|
"loss": 0.7814, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 5.032960891723633, |
|
"eval_runtime": 33.9113, |
|
"eval_samples_per_second": 22.441, |
|
"eval_steps_per_second": 1.415, |
|
"eval_wer": 0.47239045244032773, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 4.423974358974359e-05, |
|
"loss": 0.6281, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.3483684062957764, |
|
"eval_runtime": 35.8392, |
|
"eval_samples_per_second": 21.234, |
|
"eval_steps_per_second": 1.339, |
|
"eval_wer": 0.5019593872461703, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 4.2316666666666674e-05, |
|
"loss": 0.5472, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.249516487121582, |
|
"eval_runtime": 35.6948, |
|
"eval_samples_per_second": 21.32, |
|
"eval_steps_per_second": 1.345, |
|
"eval_wer": 0.47933737085856787, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.039358974358974e-05, |
|
"loss": 0.4827, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_loss": 1.1529797315597534, |
|
"eval_runtime": 35.6048, |
|
"eval_samples_per_second": 21.373, |
|
"eval_steps_per_second": 1.348, |
|
"eval_wer": 0.47684360527253294, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.847051282051282e-05, |
|
"loss": 0.4327, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_loss": 1.6159653663635254, |
|
"eval_runtime": 34.1129, |
|
"eval_samples_per_second": 22.308, |
|
"eval_steps_per_second": 1.407, |
|
"eval_wer": 0.4645529034556466, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 3.6548717948717956e-05, |
|
"loss": 0.3989, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 3.263315439224243, |
|
"eval_runtime": 34.7016, |
|
"eval_samples_per_second": 21.93, |
|
"eval_steps_per_second": 1.383, |
|
"eval_wer": 0.47025293908086924, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 3.4625641025641024e-05, |
|
"loss": 0.3522, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 2.2337419986724854, |
|
"eval_runtime": 35.6982, |
|
"eval_samples_per_second": 21.318, |
|
"eval_steps_per_second": 1.345, |
|
"eval_wer": 0.4707873174207339, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 3.2702564102564105e-05, |
|
"loss": 0.3201, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 3.6878626346588135, |
|
"eval_runtime": 36.4839, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 1.316, |
|
"eval_wer": 0.45653722835767724, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"learning_rate": 3.078076923076923e-05, |
|
"loss": 0.2899, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"eval_loss": 5.438948631286621, |
|
"eval_runtime": 34.4996, |
|
"eval_samples_per_second": 22.058, |
|
"eval_steps_per_second": 1.391, |
|
"eval_wer": 0.45992162451015317, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 2.885897435897436e-05, |
|
"loss": 0.2776, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"eval_loss": 3.528372049331665, |
|
"eval_runtime": 35.9097, |
|
"eval_samples_per_second": 21.192, |
|
"eval_steps_per_second": 1.337, |
|
"eval_wer": 0.4536872105450659, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 2.6935897435897438e-05, |
|
"loss": 0.2574, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"eval_loss": 2.1759419441223145, |
|
"eval_runtime": 34.313, |
|
"eval_samples_per_second": 22.178, |
|
"eval_steps_per_second": 1.399, |
|
"eval_wer": 0.464909155682223, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 2.5012820512820513e-05, |
|
"loss": 0.2378, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"eval_loss": 3.390052080154419, |
|
"eval_runtime": 34.4471, |
|
"eval_samples_per_second": 22.092, |
|
"eval_steps_per_second": 1.393, |
|
"eval_wer": 0.4447809048806555, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 2.3092307692307694e-05, |
|
"loss": 0.217, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"eval_loss": 1.163241982460022, |
|
"eval_runtime": 36.0254, |
|
"eval_samples_per_second": 21.124, |
|
"eval_steps_per_second": 1.332, |
|
"eval_wer": 0.45653722835767724, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 29.63, |
|
"learning_rate": 2.1169230769230768e-05, |
|
"loss": 0.2115, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.63, |
|
"eval_loss": 1.7441022396087646, |
|
"eval_runtime": 35.1297, |
|
"eval_samples_per_second": 21.663, |
|
"eval_steps_per_second": 1.366, |
|
"eval_wer": 0.42322764517278233, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 31.48, |
|
"learning_rate": 1.9246153846153846e-05, |
|
"loss": 0.1959, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 31.48, |
|
"eval_loss": 3.4991888999938965, |
|
"eval_runtime": 36.7374, |
|
"eval_samples_per_second": 20.715, |
|
"eval_steps_per_second": 1.307, |
|
"eval_wer": 0.4303526897043107, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.7323076923076924e-05, |
|
"loss": 0.187, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 3.6162784099578857, |
|
"eval_runtime": 34.8093, |
|
"eval_samples_per_second": 21.862, |
|
"eval_steps_per_second": 1.379, |
|
"eval_wer": 0.43694335589597433, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 35.19, |
|
"learning_rate": 1.540128205128205e-05, |
|
"loss": 0.1748, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 35.19, |
|
"eval_loss": 3.603774309158325, |
|
"eval_runtime": 35.9258, |
|
"eval_samples_per_second": 21.183, |
|
"eval_steps_per_second": 1.336, |
|
"eval_wer": 0.4467402921268258, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 1.347820512820513e-05, |
|
"loss": 0.17, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"eval_loss": 2.970829486846924, |
|
"eval_runtime": 35.2981, |
|
"eval_samples_per_second": 21.559, |
|
"eval_steps_per_second": 1.36, |
|
"eval_wer": 0.43623085144282153, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 1.1557692307692308e-05, |
|
"loss": 0.159, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"eval_loss": 3.2044625282287598, |
|
"eval_runtime": 34.6143, |
|
"eval_samples_per_second": 21.985, |
|
"eval_steps_per_second": 1.387, |
|
"eval_wer": 0.42785892411827575, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 40.74, |
|
"learning_rate": 9.635897435897436e-06, |
|
"loss": 0.153, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 40.74, |
|
"eval_loss": 3.2426888942718506, |
|
"eval_runtime": 35.08, |
|
"eval_samples_per_second": 21.693, |
|
"eval_steps_per_second": 1.368, |
|
"eval_wer": 0.42874955468471676, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 42.59, |
|
"learning_rate": 7.712820512820514e-06, |
|
"loss": 0.1463, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 42.59, |
|
"eval_loss": 3.5439305305480957, |
|
"eval_runtime": 36.6846, |
|
"eval_samples_per_second": 20.744, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wer": 0.4269682935518347, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 5.78974358974359e-06, |
|
"loss": 0.139, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"eval_loss": 3.938081741333008, |
|
"eval_runtime": 34.7219, |
|
"eval_samples_per_second": 21.917, |
|
"eval_steps_per_second": 1.382, |
|
"eval_wer": 0.41503384396152476, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 46.3, |
|
"learning_rate": 3.867948717948718e-06, |
|
"loss": 0.1352, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 46.3, |
|
"eval_loss": 4.174356937408447, |
|
"eval_runtime": 36.3496, |
|
"eval_samples_per_second": 20.936, |
|
"eval_steps_per_second": 1.321, |
|
"eval_wer": 0.4091556822230139, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 48.15, |
|
"learning_rate": 1.9461538461538464e-06, |
|
"loss": 0.1369, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 48.15, |
|
"eval_loss": 4.227924823760986, |
|
"eval_runtime": 34.5902, |
|
"eval_samples_per_second": 22.0, |
|
"eval_steps_per_second": 1.388, |
|
"eval_wer": 0.4153900961881012, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 2.3076923076923076e-08, |
|
"loss": 0.1273, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 4.169058322906494, |
|
"eval_runtime": 34.2804, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 1.4, |
|
"eval_wer": 0.4132525828286427, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 40500, |
|
"total_flos": 5.1003805267852526e+20, |
|
"train_loss": 0.0, |
|
"train_runtime": 69.3888, |
|
"train_samples_per_second": 3734.751, |
|
"train_steps_per_second": 116.733 |
|
} |
|
], |
|
"max_steps": 8100, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.1003805267852526e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|