addy88's picture
add tokenizer
27caf1a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 24.137931034482758,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.45,
"learning_rate": 4.97e-05,
"loss": 3.8466,
"step": 500
},
{
"epoch": 3.45,
"eval_loss": 2.4666473865509033,
"eval_runtime": 66.0325,
"eval_samples_per_second": 25.442,
"eval_steps_per_second": 3.18,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 6.9,
"learning_rate": 9.970000000000001e-05,
"loss": 0.9403,
"step": 1000
},
{
"epoch": 6.9,
"eval_loss": 0.4411664605140686,
"eval_runtime": 66.1692,
"eval_samples_per_second": 25.389,
"eval_steps_per_second": 3.174,
"eval_wer": 0.39928330232237613,
"step": 1000
},
{
"epoch": 10.34,
"learning_rate": 8.516417910447761e-05,
"loss": 0.2824,
"step": 1500
},
{
"epoch": 10.34,
"eval_loss": 0.3629843592643738,
"eval_runtime": 65.8604,
"eval_samples_per_second": 25.508,
"eval_steps_per_second": 3.189,
"eval_wer": 0.3318172420922059,
"step": 1500
},
{
"epoch": 13.79,
"learning_rate": 7.023880597014926e-05,
"loss": 0.1639,
"step": 2000
},
{
"epoch": 13.79,
"eval_loss": 0.3536781072616577,
"eval_runtime": 65.6765,
"eval_samples_per_second": 25.58,
"eval_steps_per_second": 3.197,
"eval_wer": 0.3030115085107849,
"step": 2000
},
{
"epoch": 17.24,
"learning_rate": 5.53134328358209e-05,
"loss": 0.1251,
"step": 2500
},
{
"epoch": 17.24,
"eval_loss": 0.39447325468063354,
"eval_runtime": 66.1834,
"eval_samples_per_second": 25.384,
"eval_steps_per_second": 3.173,
"eval_wer": 0.2973606229756736,
"step": 2500
},
{
"epoch": 20.69,
"learning_rate": 4.0388059701492536e-05,
"loss": 0.1075,
"step": 3000
},
{
"epoch": 20.69,
"eval_loss": 0.3765473961830139,
"eval_runtime": 66.4767,
"eval_samples_per_second": 25.272,
"eval_steps_per_second": 3.159,
"eval_wer": 0.29370822134932123,
"step": 3000
},
{
"epoch": 24.14,
"learning_rate": 2.546268656716418e-05,
"loss": 0.0831,
"step": 3500
},
{
"epoch": 24.14,
"eval_loss": 0.3624289631843567,
"eval_runtime": 66.6948,
"eval_samples_per_second": 25.189,
"eval_steps_per_second": 3.149,
"eval_wer": 0.2816484046585349,
"step": 3500
}
],
"max_steps": 4350,
"num_train_epochs": 30,
"total_flos": 3.2107549243060224e+18,
"trial_name": null,
"trial_params": null
}