|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 75.0, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.5e-05, |
|
"loss": 8.1262, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 4.0765790939331055, |
|
"eval_runtime": 21.8341, |
|
"eval_samples_per_second": 18.091, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5989, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 3.226550340652466, |
|
"eval_runtime": 21.5714, |
|
"eval_samples_per_second": 18.311, |
|
"eval_steps_per_second": 0.603, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 7.5e-05, |
|
"loss": 3.0472, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 2.8889501094818115, |
|
"eval_runtime": 21.7358, |
|
"eval_samples_per_second": 18.173, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7327, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 0.6693879961967468, |
|
"eval_runtime": 22.0147, |
|
"eval_samples_per_second": 17.943, |
|
"eval_steps_per_second": 0.591, |
|
"eval_wer": 0.6622969506982046, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.000125, |
|
"loss": 0.4239, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"eval_loss": 0.4327137768268585, |
|
"eval_runtime": 21.6438, |
|
"eval_samples_per_second": 18.25, |
|
"eval_steps_per_second": 0.601, |
|
"eval_wer": 0.5483043602165859, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 0.00015, |
|
"loss": 0.2337, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"eval_loss": 0.38920775055885315, |
|
"eval_runtime": 21.8228, |
|
"eval_samples_per_second": 18.1, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 0.5195212311199772, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.000175, |
|
"loss": 0.1724, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.391787052154541, |
|
"eval_runtime": 21.5976, |
|
"eval_samples_per_second": 18.289, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.5027073240239385, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1392, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"eval_loss": 0.36863410472869873, |
|
"eval_runtime": 21.5216, |
|
"eval_samples_per_second": 18.354, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.4670846394984326, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"learning_rate": 0.00022500000000000002, |
|
"loss": 0.1196, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"eval_loss": 0.3849872052669525, |
|
"eval_runtime": 21.7525, |
|
"eval_samples_per_second": 18.159, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 0.45397549159304645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 0.00025, |
|
"loss": 0.1095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"eval_loss": 0.3586702346801758, |
|
"eval_runtime": 21.8949, |
|
"eval_samples_per_second": 18.041, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 0.4516956397834141, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"learning_rate": 0.000275, |
|
"loss": 0.1026, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"eval_loss": 0.36062636971473694, |
|
"eval_runtime": 21.8096, |
|
"eval_samples_per_second": 18.111, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 0.44029638073525224, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0972, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"eval_loss": 0.40814533829689026, |
|
"eval_runtime": 21.9118, |
|
"eval_samples_per_second": 18.027, |
|
"eval_steps_per_second": 0.593, |
|
"eval_wer": 0.43317184383015106, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 46.43, |
|
"learning_rate": 0.00032500000000000004, |
|
"loss": 0.0932, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 46.43, |
|
"eval_loss": 0.3904629945755005, |
|
"eval_runtime": 21.7505, |
|
"eval_samples_per_second": 18.16, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 0.426902251353662, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00035, |
|
"loss": 0.0897, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.37547701597213745, |
|
"eval_runtime": 22.1102, |
|
"eval_samples_per_second": 17.865, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.42747221430607013, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"learning_rate": 0.000375, |
|
"loss": 0.0846, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"eval_loss": 0.36823779344558716, |
|
"eval_runtime": 21.896, |
|
"eval_samples_per_second": 18.04, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 0.42091764035337703, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0854, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"eval_loss": 0.3795730471611023, |
|
"eval_runtime": 21.5795, |
|
"eval_samples_per_second": 18.304, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.41635793673411226, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 60.71, |
|
"learning_rate": 0.000425, |
|
"loss": 0.0845, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 60.71, |
|
"eval_loss": 0.35857513546943665, |
|
"eval_runtime": 21.7949, |
|
"eval_samples_per_second": 18.124, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 0.3941293815901966, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 0.0854, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"eval_loss": 0.376005083322525, |
|
"eval_runtime": 21.5391, |
|
"eval_samples_per_second": 18.339, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.41664291821031635, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 67.86, |
|
"learning_rate": 0.000475, |
|
"loss": 0.0846, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 67.86, |
|
"eval_loss": 0.3710671663284302, |
|
"eval_runtime": 21.5817, |
|
"eval_samples_per_second": 18.303, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.41208321459105157, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0827, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_loss": 0.40105244517326355, |
|
"eval_runtime": 21.6021, |
|
"eval_samples_per_second": 18.285, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.4160729552579082, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0665, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 0.3478299379348755, |
|
"eval_runtime": 21.8418, |
|
"eval_samples_per_second": 18.085, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 0.36534625249358793, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"step": 2100, |
|
"total_flos": 5.419723804611084e+19, |
|
"train_loss": 0.8885498528253465, |
|
"train_runtime": 26470.8294, |
|
"train_samples_per_second": 10.064, |
|
"train_steps_per_second": 0.079 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2100, |
|
"num_train_epochs": 75, |
|
"save_steps": 100, |
|
"total_flos": 5.419723804611084e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|