xls-r-hi-test / trainer_state.json
kapilkd13's picture
End of training
11b606c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.11864406779661,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.36,
"eval_loss": 1.4595407247543335,
"eval_runtime": 81.6295,
"eval_samples_per_second": 25.665,
"eval_steps_per_second": 3.21,
"eval_wer": 1.0039260592180599,
"step": 400
},
{
"epoch": 1.69,
"learning_rate": 0.0002982,
"loss": 4.7778,
"step": 500
},
{
"epoch": 2.71,
"eval_loss": 0.8082281947135925,
"eval_runtime": 80.8384,
"eval_samples_per_second": 25.916,
"eval_steps_per_second": 3.241,
"eval_wer": 1.0115055346529254,
"step": 800
},
{
"epoch": 3.39,
"learning_rate": 0.00028011999999999997,
"loss": 0.6408,
"step": 1000
},
{
"epoch": 4.07,
"eval_loss": 0.7031667828559875,
"eval_runtime": 84.117,
"eval_samples_per_second": 24.906,
"eval_steps_per_second": 3.115,
"eval_wer": 1.0078521184361198,
"step": 1200
},
{
"epoch": 5.08,
"learning_rate": 0.00026011999999999997,
"loss": 0.3937,
"step": 1500
},
{
"epoch": 5.42,
"eval_loss": 0.6889204382896423,
"eval_runtime": 80.1903,
"eval_samples_per_second": 26.125,
"eval_steps_per_second": 3.267,
"eval_wer": 1.0432957085991603,
"step": 1600
},
{
"epoch": 6.78,
"learning_rate": 0.00024011999999999997,
"loss": 0.3,
"step": 2000
},
{
"epoch": 6.78,
"eval_loss": 0.6820310354232788,
"eval_runtime": 80.2232,
"eval_samples_per_second": 26.115,
"eval_steps_per_second": 3.266,
"eval_wer": 1.0068706036316049,
"step": 2000
},
{
"epoch": 8.14,
"eval_loss": 0.6669920086860657,
"eval_runtime": 79.89,
"eval_samples_per_second": 26.224,
"eval_steps_per_second": 3.28,
"eval_wer": 1.0196302960902994,
"step": 2400
},
{
"epoch": 8.47,
"learning_rate": 0.00022011999999999997,
"loss": 0.226,
"step": 2500
},
{
"epoch": 9.49,
"eval_loss": 0.7215595841407776,
"eval_runtime": 80.0002,
"eval_samples_per_second": 26.187,
"eval_steps_per_second": 3.275,
"eval_wer": 1.0422051365941436,
"step": 2800
},
{
"epoch": 10.17,
"learning_rate": 0.00020012,
"loss": 0.197,
"step": 3000
},
{
"epoch": 10.85,
"eval_loss": 0.7669464945793152,
"eval_runtime": 80.1105,
"eval_samples_per_second": 26.151,
"eval_steps_per_second": 3.27,
"eval_wer": 1.053383499645564,
"step": 3200
},
{
"epoch": 11.86,
"learning_rate": 0.00018012,
"loss": 0.165,
"step": 3500
},
{
"epoch": 12.2,
"eval_loss": 0.7517344951629639,
"eval_runtime": 79.716,
"eval_samples_per_second": 26.281,
"eval_steps_per_second": 3.287,
"eval_wer": 1.0199574676918044,
"step": 3600
},
{
"epoch": 13.56,
"learning_rate": 0.00016011999999999998,
"loss": 0.1486,
"step": 4000
},
{
"epoch": 13.56,
"eval_loss": 0.7124771475791931,
"eval_runtime": 79.8981,
"eval_samples_per_second": 26.221,
"eval_steps_per_second": 3.279,
"eval_wer": 1.0357162331642946,
"step": 4000
},
{
"epoch": 14.92,
"eval_loss": 0.7447456121444702,
"eval_runtime": 82.2103,
"eval_samples_per_second": 25.483,
"eval_steps_per_second": 3.187,
"eval_wer": 1.0347347183597797,
"step": 4400
},
{
"epoch": 15.25,
"learning_rate": 0.00014012,
"loss": 0.122,
"step": 4500
},
{
"epoch": 16.27,
"eval_loss": 0.6899322271347046,
"eval_runtime": 79.6624,
"eval_samples_per_second": 26.298,
"eval_steps_per_second": 3.289,
"eval_wer": 1.0440045804024212,
"step": 4800
},
{
"epoch": 16.95,
"learning_rate": 0.00012011999999999998,
"loss": 0.1069,
"step": 5000
},
{
"epoch": 17.63,
"eval_loss": 0.7212241291999817,
"eval_runtime": 80.3141,
"eval_samples_per_second": 26.085,
"eval_steps_per_second": 3.262,
"eval_wer": 1.0350073613610338,
"step": 5200
},
{
"epoch": 18.64,
"learning_rate": 0.00010011999999999998,
"loss": 0.0961,
"step": 5500
},
{
"epoch": 18.98,
"eval_loss": 0.7417359352111816,
"eval_runtime": 80.0211,
"eval_samples_per_second": 26.181,
"eval_steps_per_second": 3.274,
"eval_wer": 1.0408419215878728,
"step": 5600
},
{
"epoch": 20.34,
"learning_rate": 8.012e-05,
"loss": 0.086,
"step": 6000
},
{
"epoch": 20.34,
"eval_loss": 0.7402010560035706,
"eval_runtime": 80.0522,
"eval_samples_per_second": 26.17,
"eval_steps_per_second": 3.273,
"eval_wer": 1.0355526473635421,
"step": 6000
},
{
"epoch": 21.69,
"eval_loss": 0.7760854959487915,
"eval_runtime": 80.138,
"eval_samples_per_second": 26.142,
"eval_steps_per_second": 3.269,
"eval_wer": 1.0419870221931402,
"step": 6400
},
{
"epoch": 22.03,
"learning_rate": 6.0119999999999994e-05,
"loss": 0.0756,
"step": 6500
},
{
"epoch": 23.05,
"eval_loss": 0.7345677614212036,
"eval_runtime": 80.4841,
"eval_samples_per_second": 26.03,
"eval_steps_per_second": 3.255,
"eval_wer": 1.036915862369813,
"step": 6800
},
{
"epoch": 23.73,
"learning_rate": 4.012e-05,
"loss": 0.0666,
"step": 7000
},
{
"epoch": 24.41,
"eval_loss": 0.7506045699119568,
"eval_runtime": 82.6434,
"eval_samples_per_second": 25.35,
"eval_steps_per_second": 3.17,
"eval_wer": 1.0449315666066852,
"step": 7200
},
{
"epoch": 25.42,
"learning_rate": 2.0119999999999997e-05,
"loss": 0.0595,
"step": 7500
},
{
"epoch": 25.76,
"eval_loss": 0.7319227457046509,
"eval_runtime": 79.8082,
"eval_samples_per_second": 26.25,
"eval_steps_per_second": 3.283,
"eval_wer": 1.0476034680189759,
"step": 7600
},
{
"epoch": 27.12,
"learning_rate": 1.6e-07,
"loss": 0.054,
"step": 8000
},
{
"epoch": 27.12,
"eval_loss": 0.7346429228782654,
"eval_runtime": 79.9162,
"eval_samples_per_second": 26.215,
"eval_steps_per_second": 3.278,
"eval_wer": 1.0478761110202301,
"step": 8000
},
{
"epoch": 27.12,
"step": 8000,
"total_flos": 1.592873144248711e+19,
"train_loss": 0.46973063707351687,
"train_runtime": 10600.4383,
"train_samples_per_second": 12.075,
"train_steps_per_second": 0.755
}
],
"max_steps": 8000,
"num_train_epochs": 28,
"total_flos": 1.592873144248711e+19,
"trial_name": null,
"trial_params": null
}