xls-r-300m-ur / trainer_state.json
HarrisDePerceptron's picture
End of training
e853115
raw
history blame
No virus
9.46 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 2550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.96,
"learning_rate": 7.358999999999999e-05,
"loss": 9.5036,
"step": 100
},
{
"epoch": 1.96,
"eval_loss": 4.053760528564453,
"eval_runtime": 13.4693,
"eval_samples_per_second": 25.317,
"eval_steps_per_second": 3.192,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 3.92,
"learning_rate": 7.059e-05,
"loss": 3.3669,
"step": 200
},
{
"epoch": 3.92,
"eval_loss": 3.204110622406006,
"eval_runtime": 13.5228,
"eval_samples_per_second": 25.217,
"eval_steps_per_second": 3.18,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 5.88,
"learning_rate": 6.759e-05,
"loss": 3.1499,
"step": 300
},
{
"epoch": 5.88,
"eval_loss": 3.1220219135284424,
"eval_runtime": 13.3359,
"eval_samples_per_second": 25.57,
"eval_steps_per_second": 3.224,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 7.84,
"learning_rate": 6.458999999999999e-05,
"loss": 3.0271,
"step": 400
},
{
"epoch": 7.84,
"eval_loss": 2.9935410022735596,
"eval_runtime": 13.4609,
"eval_samples_per_second": 25.333,
"eval_steps_per_second": 3.194,
"eval_wer": 0.9970479704797048,
"step": 400
},
{
"epoch": 9.8,
"learning_rate": 6.159e-05,
"loss": 2.9565,
"step": 500
},
{
"epoch": 9.8,
"eval_loss": 2.935685157775879,
"eval_runtime": 13.2089,
"eval_samples_per_second": 25.816,
"eval_steps_per_second": 3.255,
"eval_wer": 0.9992619926199262,
"step": 500
},
{
"epoch": 11.76,
"learning_rate": 5.858999999999999e-05,
"loss": 2.9184,
"step": 600
},
{
"epoch": 11.76,
"eval_loss": 2.916475296020508,
"eval_runtime": 13.2506,
"eval_samples_per_second": 25.735,
"eval_steps_per_second": 3.245,
"eval_wer": 0.996309963099631,
"step": 600
},
{
"epoch": 13.73,
"learning_rate": 5.5589999999999995e-05,
"loss": 2.8832,
"step": 700
},
{
"epoch": 13.73,
"eval_loss": 2.8761985301971436,
"eval_runtime": 13.271,
"eval_samples_per_second": 25.695,
"eval_steps_per_second": 3.24,
"eval_wer": 0.9911439114391144,
"step": 700
},
{
"epoch": 15.69,
"learning_rate": 5.2589999999999996e-05,
"loss": 2.8407,
"step": 800
},
{
"epoch": 15.69,
"eval_loss": 2.810206890106201,
"eval_runtime": 13.1264,
"eval_samples_per_second": 25.978,
"eval_steps_per_second": 3.276,
"eval_wer": 0.9970479704797048,
"step": 800
},
{
"epoch": 17.65,
"learning_rate": 4.959e-05,
"loss": 2.7007,
"step": 900
},
{
"epoch": 17.65,
"eval_loss": 2.436384439468384,
"eval_runtime": 13.0662,
"eval_samples_per_second": 26.098,
"eval_steps_per_second": 3.291,
"eval_wer": 0.996309963099631,
"step": 900
},
{
"epoch": 19.61,
"learning_rate": 4.658999999999999e-05,
"loss": 2.4206,
"step": 1000
},
{
"epoch": 19.61,
"eval_loss": 1.985196590423584,
"eval_runtime": 13.5307,
"eval_samples_per_second": 25.202,
"eval_steps_per_second": 3.178,
"eval_wer": 0.9420664206642066,
"step": 1000
},
{
"epoch": 21.57,
"learning_rate": 4.359e-05,
"loss": 2.0699,
"step": 1100
},
{
"epoch": 21.57,
"eval_loss": 1.4849445819854736,
"eval_runtime": 13.2138,
"eval_samples_per_second": 25.806,
"eval_steps_per_second": 3.254,
"eval_wer": 0.8343173431734318,
"step": 1100
},
{
"epoch": 23.53,
"learning_rate": 4.0589999999999996e-05,
"loss": 1.8311,
"step": 1200
},
{
"epoch": 23.53,
"eval_loss": 1.3083971738815308,
"eval_runtime": 13.1253,
"eval_samples_per_second": 25.98,
"eval_steps_per_second": 3.276,
"eval_wer": 0.7800738007380074,
"step": 1200
},
{
"epoch": 25.49,
"learning_rate": 3.759e-05,
"loss": 1.7127,
"step": 1300
},
{
"epoch": 25.49,
"eval_loss": 1.203996181488037,
"eval_runtime": 14.0537,
"eval_samples_per_second": 24.264,
"eval_steps_per_second": 3.06,
"eval_wer": 0.744649446494465,
"step": 1300
},
{
"epoch": 27.45,
"learning_rate": 3.459e-05,
"loss": 1.6239,
"step": 1400
},
{
"epoch": 27.45,
"eval_loss": 1.1359221935272217,
"eval_runtime": 13.3447,
"eval_samples_per_second": 25.553,
"eval_steps_per_second": 3.222,
"eval_wer": 0.7280442804428044,
"step": 1400
},
{
"epoch": 29.41,
"learning_rate": 3.159e-05,
"loss": 1.5654,
"step": 1500
},
{
"epoch": 29.41,
"eval_loss": 1.068825364112854,
"eval_runtime": 13.1322,
"eval_samples_per_second": 25.967,
"eval_steps_per_second": 3.274,
"eval_wer": 0.7158671586715867,
"step": 1500
},
{
"epoch": 31.37,
"learning_rate": 2.8589999999999996e-05,
"loss": 1.4965,
"step": 1600
},
{
"epoch": 31.37,
"eval_loss": 1.052024245262146,
"eval_runtime": 13.0376,
"eval_samples_per_second": 26.155,
"eval_steps_per_second": 3.298,
"eval_wer": 0.6985239852398524,
"step": 1600
},
{
"epoch": 33.33,
"learning_rate": 2.5589999999999997e-05,
"loss": 1.445,
"step": 1700
},
{
"epoch": 33.33,
"eval_loss": 1.0314019918441772,
"eval_runtime": 13.256,
"eval_samples_per_second": 25.724,
"eval_steps_per_second": 3.244,
"eval_wer": 0.6878228782287823,
"step": 1700
},
{
"epoch": 35.29,
"learning_rate": 2.259e-05,
"loss": 1.4095,
"step": 1800
},
{
"epoch": 35.29,
"eval_loss": 1.0063327550888062,
"eval_runtime": 13.1463,
"eval_samples_per_second": 25.939,
"eval_steps_per_second": 3.271,
"eval_wer": 0.6712177121771218,
"step": 1800
},
{
"epoch": 37.25,
"learning_rate": 1.9589999999999997e-05,
"loss": 1.3853,
"step": 1900
},
{
"epoch": 37.25,
"eval_loss": 0.9847874045372009,
"eval_runtime": 13.6657,
"eval_samples_per_second": 24.953,
"eval_steps_per_second": 3.147,
"eval_wer": 0.6701107011070111,
"step": 1900
},
{
"epoch": 39.22,
"learning_rate": 1.659e-05,
"loss": 1.3558,
"step": 2000
},
{
"epoch": 39.22,
"eval_loss": 0.9738264679908752,
"eval_runtime": 13.0206,
"eval_samples_per_second": 26.189,
"eval_steps_per_second": 3.302,
"eval_wer": 0.6730627306273063,
"step": 2000
},
{
"epoch": 41.18,
"learning_rate": 1.3589999999999999e-05,
"loss": 1.3415,
"step": 2100
},
{
"epoch": 41.18,
"eval_loss": 0.9656162858009338,
"eval_runtime": 13.0695,
"eval_samples_per_second": 26.091,
"eval_steps_per_second": 3.29,
"eval_wer": 0.6645756457564576,
"step": 2100
},
{
"epoch": 43.14,
"learning_rate": 1.0589999999999999e-05,
"loss": 1.3102,
"step": 2200
},
{
"epoch": 43.14,
"eval_loss": 0.9631739854812622,
"eval_runtime": 13.1258,
"eval_samples_per_second": 25.979,
"eval_steps_per_second": 3.276,
"eval_wer": 0.655719557195572,
"step": 2200
},
{
"epoch": 45.1,
"learning_rate": 7.589999999999999e-06,
"loss": 1.309,
"step": 2300
},
{
"epoch": 45.1,
"eval_loss": 0.9495635032653809,
"eval_runtime": 12.963,
"eval_samples_per_second": 26.306,
"eval_steps_per_second": 3.317,
"eval_wer": 0.655719557195572,
"step": 2300
},
{
"epoch": 47.06,
"learning_rate": 4.589999999999999e-06,
"loss": 1.2993,
"step": 2400
},
{
"epoch": 47.06,
"eval_loss": 0.960854709148407,
"eval_runtime": 13.0799,
"eval_samples_per_second": 26.071,
"eval_steps_per_second": 3.287,
"eval_wer": 0.6549815498154982,
"step": 2400
},
{
"epoch": 49.02,
"learning_rate": 1.5899999999999998e-06,
"loss": 1.2695,
"step": 2500
},
{
"epoch": 49.02,
"eval_loss": 0.9603787660598755,
"eval_runtime": 13.0779,
"eval_samples_per_second": 26.075,
"eval_steps_per_second": 3.288,
"eval_wer": 0.6542435424354244,
"step": 2500
},
{
"epoch": 50.0,
"step": 2550,
"total_flos": 4.920553620909019e+18,
"train_loss": 2.3068374424354703,
"train_runtime": 2918.903,
"train_samples_per_second": 13.875,
"train_steps_per_second": 0.874
}
],
"max_steps": 2550,
"num_train_epochs": 50,
"total_flos": 4.920553620909019e+18,
"trial_name": null,
"trial_params": null
}