|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.997489539748955, |
|
"global_step": 2980, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9000000000000005e-05, |
|
"loss": 5.3579, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 3.2697, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000149, |
|
"loss": 2.4711, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.000199, |
|
"loss": 2.1202, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.000249, |
|
"loss": 2.0846, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.1640619039535522, |
|
"eval_runtime": 414.2546, |
|
"eval_samples_per_second": 25.076, |
|
"eval_steps_per_second": 3.136, |
|
"eval_wer": 0.8072035074088462, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.000299, |
|
"loss": 2.1202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00034899999999999997, |
|
"loss": 2.1037, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00039900000000000005, |
|
"loss": 2.0882, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.000449, |
|
"loss": 2.126, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.000499, |
|
"loss": 2.1201, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 1.1776171922683716, |
|
"eval_runtime": 424.2371, |
|
"eval_samples_per_second": 24.486, |
|
"eval_steps_per_second": 3.062, |
|
"eval_wer": 0.8329171060177221, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.000549, |
|
"loss": 2.1435, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.000599, |
|
"loss": 2.152, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0006490000000000001, |
|
"loss": 2.11, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.000699, |
|
"loss": 2.1503, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.000749, |
|
"loss": 2.1972, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 1.2631869316101074, |
|
"eval_runtime": 410.677, |
|
"eval_samples_per_second": 25.295, |
|
"eval_steps_per_second": 3.163, |
|
"eval_wer": 0.8723754555376732, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.000799, |
|
"loss": 2.1788, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.000849, |
|
"loss": 2.1774, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0008990000000000001, |
|
"loss": 2.2205, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.000949, |
|
"loss": 2.2466, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.000999, |
|
"loss": 2.2643, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_loss": 1.372324824333191, |
|
"eval_runtime": 411.6031, |
|
"eval_samples_per_second": 25.238, |
|
"eval_steps_per_second": 3.156, |
|
"eval_wer": 0.8982740440645984, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 2.2996, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0007979591836734694, |
|
"loss": 2.2775, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0006959183673469388, |
|
"loss": 2.2478, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0005938775510204082, |
|
"loss": 2.2288, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.0004918367346938776, |
|
"loss": 2.1649, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 1.25503671169281, |
|
"eval_runtime": 416.24, |
|
"eval_samples_per_second": 24.957, |
|
"eval_steps_per_second": 3.121, |
|
"eval_wer": 0.884196311301034, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.000389795918367347, |
|
"loss": 2.1054, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.00028775510204081633, |
|
"loss": 2.0872, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 1.9953, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 8.36734693877551e-05, |
|
"loss": 1.9474, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2980, |
|
"total_flos": 1.006981652948512e+20, |
|
"train_loss": 2.3014913469352978, |
|
"train_runtime": 21335.6515, |
|
"train_samples_per_second": 17.909, |
|
"train_steps_per_second": 0.14 |
|
} |
|
], |
|
"max_steps": 2980, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.006981652948512e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|