13048909972's picture
End of training
79e73cd
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 1635,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.92,
"eval_loss": 3.585611343383789,
"eval_runtime": 69.1996,
"eval_samples_per_second": 23.801,
"eval_steps_per_second": 2.977,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 1.83,
"eval_loss": 3.0358903408050537,
"eval_runtime": 70.0289,
"eval_samples_per_second": 23.519,
"eval_steps_per_second": 2.942,
"eval_wer": 0.9998978653865795,
"step": 200
},
{
"epoch": 2.75,
"eval_loss": 0.9341107606887817,
"eval_runtime": 69.5975,
"eval_samples_per_second": 23.665,
"eval_steps_per_second": 2.96,
"eval_wer": 0.7737718312736186,
"step": 300
},
{
"epoch": 3.67,
"eval_loss": 0.5874470472335815,
"eval_runtime": 69.8546,
"eval_samples_per_second": 23.578,
"eval_steps_per_second": 2.949,
"eval_wer": 0.6386477377183127,
"step": 400
},
{
"epoch": 4.59,
"learning_rate": 0.0002988,
"loss": 3.214,
"step": 500
},
{
"epoch": 4.59,
"eval_loss": 0.5316070914268494,
"eval_runtime": 69.5825,
"eval_samples_per_second": 23.67,
"eval_steps_per_second": 2.961,
"eval_wer": 0.5525482586048411,
"step": 500
},
{
"epoch": 5.5,
"eval_loss": 0.45860984921455383,
"eval_runtime": 68.7814,
"eval_samples_per_second": 23.945,
"eval_steps_per_second": 2.995,
"eval_wer": 0.519252374629762,
"step": 600
},
{
"epoch": 6.42,
"eval_loss": 0.45034125447273254,
"eval_runtime": 67.077,
"eval_samples_per_second": 24.554,
"eval_steps_per_second": 3.071,
"eval_wer": 0.4605249719129813,
"step": 700
},
{
"epoch": 7.34,
"eval_loss": 0.41716983914375305,
"eval_runtime": 68.1176,
"eval_samples_per_second": 24.179,
"eval_steps_per_second": 3.024,
"eval_wer": 0.4390767030946788,
"step": 800
},
{
"epoch": 8.26,
"eval_loss": 0.4093872606754303,
"eval_runtime": 69.7848,
"eval_samples_per_second": 23.601,
"eval_steps_per_second": 2.952,
"eval_wer": 0.41854764579716064,
"step": 900
},
{
"epoch": 9.17,
"learning_rate": 0.00016837004405286342,
"loss": 0.2284,
"step": 1000
},
{
"epoch": 9.17,
"eval_loss": 0.4091317355632782,
"eval_runtime": 70.1477,
"eval_samples_per_second": 23.479,
"eval_steps_per_second": 2.937,
"eval_wer": 0.39985701154121134,
"step": 1000
},
{
"epoch": 10.09,
"eval_loss": 0.39890098571777344,
"eval_runtime": 69.3262,
"eval_samples_per_second": 23.757,
"eval_steps_per_second": 2.971,
"eval_wer": 0.40404453069145135,
"step": 1100
},
{
"epoch": 11.01,
"eval_loss": 0.4066716432571411,
"eval_runtime": 69.7662,
"eval_samples_per_second": 23.607,
"eval_steps_per_second": 2.953,
"eval_wer": 0.3930139924420386,
"step": 1200
},
{
"epoch": 11.93,
"eval_loss": 0.40022560954093933,
"eval_runtime": 67.6908,
"eval_samples_per_second": 24.331,
"eval_steps_per_second": 3.043,
"eval_wer": 0.3775916658155449,
"step": 1300
},
{
"epoch": 12.84,
"eval_loss": 0.3982270061969757,
"eval_runtime": 66.5115,
"eval_samples_per_second": 24.763,
"eval_steps_per_second": 3.097,
"eval_wer": 0.36492697375140437,
"step": 1400
},
{
"epoch": 13.76,
"learning_rate": 3.621145374449339e-05,
"loss": 0.1029,
"step": 1500
},
{
"epoch": 13.76,
"eval_loss": 0.38969308137893677,
"eval_runtime": 65.75,
"eval_samples_per_second": 25.049,
"eval_steps_per_second": 3.133,
"eval_wer": 0.3703401082626902,
"step": 1500
},
{
"epoch": 14.68,
"eval_loss": 0.3836365342140198,
"eval_runtime": 65.8183,
"eval_samples_per_second": 25.023,
"eval_steps_per_second": 3.13,
"eval_wer": 0.3566540700643448,
"step": 1600
},
{
"epoch": 15.0,
"step": 1635,
"total_flos": 6.429866751096261e+18,
"train_loss": 1.0907053682053125,
"train_runtime": 3864.236,
"train_samples_per_second": 13.501,
"train_steps_per_second": 0.423
}
],
"max_steps": 1635,
"num_train_epochs": 15,
"total_flos": 6.429866751096261e+18,
"trial_name": null,
"trial_params": null
}