|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 40, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.5e-05, |
|
"loss": 19.0933, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 32.55470657348633, |
|
"eval_runtime": 5.3015, |
|
"eval_samples_per_second": 24.144, |
|
"eval_steps_per_second": 0.377, |
|
"eval_wer": 1.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.125e-05, |
|
"loss": 18.6757, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 31.040802001953125, |
|
"eval_runtime": 5.2939, |
|
"eval_samples_per_second": 24.179, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 1.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.937499999999999e-05, |
|
"loss": 17.1649, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 29.57763671875, |
|
"eval_runtime": 5.2818, |
|
"eval_samples_per_second": 24.234, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 1.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.5625e-05, |
|
"loss": 14.7415, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 22.918315887451172, |
|
"eval_runtime": 5.4291, |
|
"eval_samples_per_second": 23.577, |
|
"eval_steps_per_second": 0.368, |
|
"eval_wer": 1.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.187499999999999e-05, |
|
"loss": 11.8071, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 17.507305145263672, |
|
"eval_runtime": 5.3146, |
|
"eval_samples_per_second": 24.085, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 1.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5.8124999999999997e-05, |
|
"loss": 9.7675, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 14.17426872253418, |
|
"eval_runtime": 5.4054, |
|
"eval_samples_per_second": 23.68, |
|
"eval_steps_per_second": 0.37, |
|
"eval_wer": 1.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.4374999999999994e-05, |
|
"loss": 8.4193, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 12.122542381286621, |
|
"eval_runtime": 5.2782, |
|
"eval_samples_per_second": 24.251, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 1.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5.0625e-05, |
|
"loss": 7.4746, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 10.744585037231445, |
|
"eval_runtime": 5.3374, |
|
"eval_samples_per_second": 23.982, |
|
"eval_steps_per_second": 0.375, |
|
"eval_wer": 1.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.6874999999999994e-05, |
|
"loss": 6.8442, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 9.794867515563965, |
|
"eval_runtime": 5.2577, |
|
"eval_samples_per_second": 24.345, |
|
"eval_steps_per_second": 0.38, |
|
"eval_wer": 1.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.312499999999999e-05, |
|
"loss": 6.3765, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 9.114768981933594, |
|
"eval_runtime": 5.2827, |
|
"eval_samples_per_second": 24.23, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 1.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9374999999999995e-05, |
|
"loss": 6.0321, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 8.59341812133789, |
|
"eval_runtime": 5.2171, |
|
"eval_samples_per_second": 24.535, |
|
"eval_steps_per_second": 0.383, |
|
"eval_wer": 1.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.5625e-05, |
|
"loss": 5.7783, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 8.198293685913086, |
|
"eval_runtime": 5.3204, |
|
"eval_samples_per_second": 24.058, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 1.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.1874999999999996e-05, |
|
"loss": 5.5827, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 7.877962112426758, |
|
"eval_runtime": 5.2188, |
|
"eval_samples_per_second": 24.527, |
|
"eval_steps_per_second": 0.383, |
|
"eval_wer": 1.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2.8125e-05, |
|
"loss": 5.4249, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 7.628803730010986, |
|
"eval_runtime": 5.2811, |
|
"eval_samples_per_second": 24.238, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 1.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 5.3088, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 7.427917003631592, |
|
"eval_runtime": 5.236, |
|
"eval_samples_per_second": 24.446, |
|
"eval_steps_per_second": 0.382, |
|
"eval_wer": 1.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 5.2078, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 7.268764972686768, |
|
"eval_runtime": 5.2941, |
|
"eval_samples_per_second": 24.178, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 1.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 5.1289, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 7.145933628082275, |
|
"eval_runtime": 5.2685, |
|
"eval_samples_per_second": 24.295, |
|
"eval_steps_per_second": 0.38, |
|
"eval_wer": 1.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.3124999999999999e-05, |
|
"loss": 5.0697, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 7.052780628204346, |
|
"eval_runtime": 5.263, |
|
"eval_samples_per_second": 24.321, |
|
"eval_steps_per_second": 0.38, |
|
"eval_wer": 1.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 9.375e-06, |
|
"loss": 5.0227, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 6.983470916748047, |
|
"eval_runtime": 5.2829, |
|
"eval_samples_per_second": 24.229, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 1.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.6249999999999995e-06, |
|
"loss": 4.9853, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 6.937458515167236, |
|
"eval_runtime": 5.3165, |
|
"eval_samples_per_second": 24.076, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 1.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 40, |
|
"total_flos": 5.430583918308557e+17, |
|
"train_loss": 8.69529299736023, |
|
"train_runtime": 243.8197, |
|
"train_samples_per_second": 10.5, |
|
"train_steps_per_second": 0.164 |
|
} |
|
], |
|
"max_steps": 40, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.430583918308557e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|