xls-asr-vi-40h-1B / trainer_state.json
geninhu's picture
End of training
5ffaf78
raw
history blame
10.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 40500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.85,
"learning_rate": 4.9800000000000004e-05,
"loss": 4.6222,
"step": 1500
},
{
"epoch": 1.85,
"eval_loss": 5.947904586791992,
"eval_runtime": 35.3873,
"eval_samples_per_second": 21.505,
"eval_steps_per_second": 1.356,
"eval_wer": 0.5473815461346634,
"step": 1500
},
{
"epoch": 3.7,
"learning_rate": 4.8084615384615386e-05,
"loss": 1.1362,
"step": 3000
},
{
"epoch": 3.7,
"eval_loss": 7.979872226715088,
"eval_runtime": 34.3508,
"eval_samples_per_second": 22.154,
"eval_steps_per_second": 1.397,
"eval_wer": 0.509440684004275,
"step": 3000
},
{
"epoch": 5.56,
"learning_rate": 4.616153846153846e-05,
"loss": 0.7814,
"step": 4500
},
{
"epoch": 5.56,
"eval_loss": 5.032960891723633,
"eval_runtime": 33.9113,
"eval_samples_per_second": 22.441,
"eval_steps_per_second": 1.415,
"eval_wer": 0.47239045244032773,
"step": 4500
},
{
"epoch": 7.41,
"learning_rate": 4.423974358974359e-05,
"loss": 0.6281,
"step": 6000
},
{
"epoch": 7.41,
"eval_loss": 2.3483684062957764,
"eval_runtime": 35.8392,
"eval_samples_per_second": 21.234,
"eval_steps_per_second": 1.339,
"eval_wer": 0.5019593872461703,
"step": 6000
},
{
"epoch": 9.26,
"learning_rate": 4.2316666666666674e-05,
"loss": 0.5472,
"step": 7500
},
{
"epoch": 9.26,
"eval_loss": 2.249516487121582,
"eval_runtime": 35.6948,
"eval_samples_per_second": 21.32,
"eval_steps_per_second": 1.345,
"eval_wer": 0.47933737085856787,
"step": 7500
},
{
"epoch": 11.11,
"learning_rate": 4.039358974358974e-05,
"loss": 0.4827,
"step": 9000
},
{
"epoch": 11.11,
"eval_loss": 1.1529797315597534,
"eval_runtime": 35.6048,
"eval_samples_per_second": 21.373,
"eval_steps_per_second": 1.348,
"eval_wer": 0.47684360527253294,
"step": 9000
},
{
"epoch": 12.96,
"learning_rate": 3.847051282051282e-05,
"loss": 0.4327,
"step": 10500
},
{
"epoch": 12.96,
"eval_loss": 1.6159653663635254,
"eval_runtime": 34.1129,
"eval_samples_per_second": 22.308,
"eval_steps_per_second": 1.407,
"eval_wer": 0.4645529034556466,
"step": 10500
},
{
"epoch": 14.81,
"learning_rate": 3.6548717948717956e-05,
"loss": 0.3989,
"step": 12000
},
{
"epoch": 14.81,
"eval_loss": 3.263315439224243,
"eval_runtime": 34.7016,
"eval_samples_per_second": 21.93,
"eval_steps_per_second": 1.383,
"eval_wer": 0.47025293908086924,
"step": 12000
},
{
"epoch": 16.67,
"learning_rate": 3.4625641025641024e-05,
"loss": 0.3522,
"step": 13500
},
{
"epoch": 16.67,
"eval_loss": 2.2337419986724854,
"eval_runtime": 35.6982,
"eval_samples_per_second": 21.318,
"eval_steps_per_second": 1.345,
"eval_wer": 0.4707873174207339,
"step": 13500
},
{
"epoch": 18.52,
"learning_rate": 3.2702564102564105e-05,
"loss": 0.3201,
"step": 15000
},
{
"epoch": 18.52,
"eval_loss": 3.6878626346588135,
"eval_runtime": 36.4839,
"eval_samples_per_second": 20.859,
"eval_steps_per_second": 1.316,
"eval_wer": 0.45653722835767724,
"step": 15000
},
{
"epoch": 20.37,
"learning_rate": 3.078076923076923e-05,
"loss": 0.2899,
"step": 16500
},
{
"epoch": 20.37,
"eval_loss": 5.438948631286621,
"eval_runtime": 34.4996,
"eval_samples_per_second": 22.058,
"eval_steps_per_second": 1.391,
"eval_wer": 0.45992162451015317,
"step": 16500
},
{
"epoch": 22.22,
"learning_rate": 2.885897435897436e-05,
"loss": 0.2776,
"step": 18000
},
{
"epoch": 22.22,
"eval_loss": 3.528372049331665,
"eval_runtime": 35.9097,
"eval_samples_per_second": 21.192,
"eval_steps_per_second": 1.337,
"eval_wer": 0.4536872105450659,
"step": 18000
},
{
"epoch": 24.07,
"learning_rate": 2.6935897435897438e-05,
"loss": 0.2574,
"step": 19500
},
{
"epoch": 24.07,
"eval_loss": 2.1759419441223145,
"eval_runtime": 34.313,
"eval_samples_per_second": 22.178,
"eval_steps_per_second": 1.399,
"eval_wer": 0.464909155682223,
"step": 19500
},
{
"epoch": 25.93,
"learning_rate": 2.5012820512820513e-05,
"loss": 0.2378,
"step": 21000
},
{
"epoch": 25.93,
"eval_loss": 3.390052080154419,
"eval_runtime": 34.4471,
"eval_samples_per_second": 22.092,
"eval_steps_per_second": 1.393,
"eval_wer": 0.4447809048806555,
"step": 21000
},
{
"epoch": 27.78,
"learning_rate": 2.3092307692307694e-05,
"loss": 0.217,
"step": 22500
},
{
"epoch": 27.78,
"eval_loss": 1.163241982460022,
"eval_runtime": 36.0254,
"eval_samples_per_second": 21.124,
"eval_steps_per_second": 1.332,
"eval_wer": 0.45653722835767724,
"step": 22500
},
{
"epoch": 29.63,
"learning_rate": 2.1169230769230768e-05,
"loss": 0.2115,
"step": 24000
},
{
"epoch": 29.63,
"eval_loss": 1.7441022396087646,
"eval_runtime": 35.1297,
"eval_samples_per_second": 21.663,
"eval_steps_per_second": 1.366,
"eval_wer": 0.42322764517278233,
"step": 24000
},
{
"epoch": 31.48,
"learning_rate": 1.9246153846153846e-05,
"loss": 0.1959,
"step": 25500
},
{
"epoch": 31.48,
"eval_loss": 3.4991888999938965,
"eval_runtime": 36.7374,
"eval_samples_per_second": 20.715,
"eval_steps_per_second": 1.307,
"eval_wer": 0.4303526897043107,
"step": 25500
},
{
"epoch": 33.33,
"learning_rate": 1.7323076923076924e-05,
"loss": 0.187,
"step": 27000
},
{
"epoch": 33.33,
"eval_loss": 3.6162784099578857,
"eval_runtime": 34.8093,
"eval_samples_per_second": 21.862,
"eval_steps_per_second": 1.379,
"eval_wer": 0.43694335589597433,
"step": 27000
},
{
"epoch": 35.19,
"learning_rate": 1.540128205128205e-05,
"loss": 0.1748,
"step": 28500
},
{
"epoch": 35.19,
"eval_loss": 3.603774309158325,
"eval_runtime": 35.9258,
"eval_samples_per_second": 21.183,
"eval_steps_per_second": 1.336,
"eval_wer": 0.4467402921268258,
"step": 28500
},
{
"epoch": 37.04,
"learning_rate": 1.347820512820513e-05,
"loss": 0.17,
"step": 30000
},
{
"epoch": 37.04,
"eval_loss": 2.970829486846924,
"eval_runtime": 35.2981,
"eval_samples_per_second": 21.559,
"eval_steps_per_second": 1.36,
"eval_wer": 0.43623085144282153,
"step": 30000
},
{
"epoch": 38.89,
"learning_rate": 1.1557692307692308e-05,
"loss": 0.159,
"step": 31500
},
{
"epoch": 38.89,
"eval_loss": 3.2044625282287598,
"eval_runtime": 34.6143,
"eval_samples_per_second": 21.985,
"eval_steps_per_second": 1.387,
"eval_wer": 0.42785892411827575,
"step": 31500
},
{
"epoch": 40.74,
"learning_rate": 9.635897435897436e-06,
"loss": 0.153,
"step": 33000
},
{
"epoch": 40.74,
"eval_loss": 3.2426888942718506,
"eval_runtime": 35.08,
"eval_samples_per_second": 21.693,
"eval_steps_per_second": 1.368,
"eval_wer": 0.42874955468471676,
"step": 33000
},
{
"epoch": 42.59,
"learning_rate": 7.712820512820514e-06,
"loss": 0.1463,
"step": 34500
},
{
"epoch": 42.59,
"eval_loss": 3.5439305305480957,
"eval_runtime": 36.6846,
"eval_samples_per_second": 20.744,
"eval_steps_per_second": 1.308,
"eval_wer": 0.4269682935518347,
"step": 34500
},
{
"epoch": 44.44,
"learning_rate": 5.78974358974359e-06,
"loss": 0.139,
"step": 36000
},
{
"epoch": 44.44,
"eval_loss": 3.938081741333008,
"eval_runtime": 34.7219,
"eval_samples_per_second": 21.917,
"eval_steps_per_second": 1.382,
"eval_wer": 0.41503384396152476,
"step": 36000
},
{
"epoch": 46.3,
"learning_rate": 3.867948717948718e-06,
"loss": 0.1352,
"step": 37500
},
{
"epoch": 46.3,
"eval_loss": 4.174356937408447,
"eval_runtime": 36.3496,
"eval_samples_per_second": 20.936,
"eval_steps_per_second": 1.321,
"eval_wer": 0.4091556822230139,
"step": 37500
},
{
"epoch": 48.15,
"learning_rate": 1.9461538461538464e-06,
"loss": 0.1369,
"step": 39000
},
{
"epoch": 48.15,
"eval_loss": 4.227924823760986,
"eval_runtime": 34.5902,
"eval_samples_per_second": 22.0,
"eval_steps_per_second": 1.388,
"eval_wer": 0.4153900961881012,
"step": 39000
},
{
"epoch": 50.0,
"learning_rate": 2.3076923076923076e-08,
"loss": 0.1273,
"step": 40500
},
{
"epoch": 50.0,
"eval_loss": 4.169058322906494,
"eval_runtime": 34.2804,
"eval_samples_per_second": 22.199,
"eval_steps_per_second": 1.4,
"eval_wer": 0.4132525828286427,
"step": 40500
},
{
"epoch": 50.0,
"step": 40500,
"total_flos": 5.1003805267852526e+20,
"train_loss": 0.0,
"train_runtime": 69.3888,
"train_samples_per_second": 3734.751,
"train_steps_per_second": 116.733
}
],
"max_steps": 8100,
"num_train_epochs": 10,
"total_flos": 5.1003805267852526e+20,
"trial_name": null,
"trial_params": null
}