w2v2-bert-ft-btb-cy / trainer_state.json
DewiBrynJones's picture
End of training
35c8369 verified
raw
history blame contribute delete
No virus
8.62 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 300,
"global_step": 7070,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4243281471004243,
"eval_loss": 5.990268707275391,
"eval_runtime": 138.1214,
"eval_samples_per_second": 40.949,
"eval_steps_per_second": 5.119,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 0.7072135785007072,
"grad_norm": 2.022184371948242,
"learning_rate": 4.9500000000000004e-05,
"loss": 7.061,
"step": 500
},
{
"epoch": 0.8486562942008486,
"eval_loss": 3.0451483726501465,
"eval_runtime": 135.2034,
"eval_samples_per_second": 41.833,
"eval_steps_per_second": 5.229,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 1.272984441301273,
"eval_loss": 2.9642043113708496,
"eval_runtime": 136.8192,
"eval_samples_per_second": 41.339,
"eval_steps_per_second": 5.167,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 1.4144271570014144,
"grad_norm": 0.7561541795730591,
"learning_rate": 4.624048706240488e-05,
"loss": 3.0081,
"step": 1000
},
{
"epoch": 1.6973125884016973,
"eval_loss": 2.956415891647339,
"eval_runtime": 136.6446,
"eval_samples_per_second": 41.392,
"eval_steps_per_second": 5.174,
"eval_wer": 1.0,
"step": 1200
},
{
"epoch": 2.1216407355021216,
"grad_norm": 0.3668934106826782,
"learning_rate": 4.245053272450533e-05,
"loss": 2.9733,
"step": 1500
},
{
"epoch": 2.1216407355021216,
"eval_loss": 2.947998285293579,
"eval_runtime": 136.4831,
"eval_samples_per_second": 41.441,
"eval_steps_per_second": 5.18,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 2.545968882602546,
"eval_loss": 2.945077657699585,
"eval_runtime": 136.3573,
"eval_samples_per_second": 41.479,
"eval_steps_per_second": 5.185,
"eval_wer": 1.0,
"step": 1800
},
{
"epoch": 2.828854314002829,
"grad_norm": 1.441468596458435,
"learning_rate": 3.866057838660579e-05,
"loss": 2.9454,
"step": 2000
},
{
"epoch": 2.9702970297029703,
"eval_loss": 2.9147346019744873,
"eval_runtime": 135.861,
"eval_samples_per_second": 41.631,
"eval_steps_per_second": 5.204,
"eval_wer": 1.0,
"step": 2100
},
{
"epoch": 3.3946251768033946,
"eval_loss": 2.9019417762756348,
"eval_runtime": 136.0133,
"eval_samples_per_second": 41.584,
"eval_steps_per_second": 5.198,
"eval_wer": 1.0,
"step": 2400
},
{
"epoch": 3.536067892503536,
"grad_norm": 0.46694883704185486,
"learning_rate": 3.487062404870624e-05,
"loss": 2.9064,
"step": 2500
},
{
"epoch": 3.818953323903819,
"eval_loss": 2.884958505630493,
"eval_runtime": 136.0891,
"eval_samples_per_second": 41.561,
"eval_steps_per_second": 5.195,
"eval_wer": 1.0,
"step": 2700
},
{
"epoch": 4.243281471004243,
"grad_norm": 0.4952280819416046,
"learning_rate": 3.10882800608828e-05,
"loss": 2.9048,
"step": 3000
},
{
"epoch": 4.243281471004243,
"eval_loss": 2.8812334537506104,
"eval_runtime": 136.4568,
"eval_samples_per_second": 41.449,
"eval_steps_per_second": 5.181,
"eval_wer": 1.0,
"step": 3000
},
{
"epoch": 4.667609618104668,
"eval_loss": 2.884371042251587,
"eval_runtime": 136.787,
"eval_samples_per_second": 41.349,
"eval_steps_per_second": 5.169,
"eval_wer": 1.0,
"step": 3300
},
{
"epoch": 4.9504950495049505,
"grad_norm": 0.8865047097206116,
"learning_rate": 2.7290715372907157e-05,
"loss": 2.8965,
"step": 3500
},
{
"epoch": 5.091937765205092,
"eval_loss": 2.9125277996063232,
"eval_runtime": 136.3564,
"eval_samples_per_second": 41.48,
"eval_steps_per_second": 5.185,
"eval_wer": 1.0,
"step": 3600
},
{
"epoch": 5.516265912305516,
"eval_loss": 2.898144006729126,
"eval_runtime": 136.0768,
"eval_samples_per_second": 41.565,
"eval_steps_per_second": 5.196,
"eval_wer": 1.0,
"step": 3900
},
{
"epoch": 5.657708628005658,
"grad_norm": 0.3529145121574402,
"learning_rate": 2.3508371385083716e-05,
"loss": 2.9261,
"step": 4000
},
{
"epoch": 5.9405940594059405,
"eval_loss": 2.905318260192871,
"eval_runtime": 136.6781,
"eval_samples_per_second": 41.382,
"eval_steps_per_second": 5.173,
"eval_wer": 1.0,
"step": 4200
},
{
"epoch": 6.364922206506365,
"grad_norm": 0.22229251265525818,
"learning_rate": 1.971841704718417e-05,
"loss": 2.9273,
"step": 4500
},
{
"epoch": 6.364922206506365,
"eval_loss": 2.916677951812744,
"eval_runtime": 136.7502,
"eval_samples_per_second": 41.36,
"eval_steps_per_second": 5.17,
"eval_wer": 1.0,
"step": 4500
},
{
"epoch": 6.789250353606789,
"eval_loss": 2.911259651184082,
"eval_runtime": 136.484,
"eval_samples_per_second": 41.441,
"eval_steps_per_second": 5.18,
"eval_wer": 1.0,
"step": 4800
},
{
"epoch": 7.072135785007072,
"grad_norm": 1.7586228847503662,
"learning_rate": 1.592846270928463e-05,
"loss": 2.9302,
"step": 5000
},
{
"epoch": 7.2135785007072135,
"eval_loss": 2.9133317470550537,
"eval_runtime": 135.9523,
"eval_samples_per_second": 41.603,
"eval_steps_per_second": 5.2,
"eval_wer": 1.0,
"step": 5100
},
{
"epoch": 7.637906647807638,
"eval_loss": 2.921302080154419,
"eval_runtime": 136.5228,
"eval_samples_per_second": 41.429,
"eval_steps_per_second": 5.179,
"eval_wer": 1.0,
"step": 5400
},
{
"epoch": 7.779349363507779,
"grad_norm": 0.6302638649940491,
"learning_rate": 1.2146118721461187e-05,
"loss": 2.9397,
"step": 5500
},
{
"epoch": 8.062234794908063,
"eval_loss": 2.9251174926757812,
"eval_runtime": 136.4335,
"eval_samples_per_second": 41.456,
"eval_steps_per_second": 5.182,
"eval_wer": 1.0,
"step": 5700
},
{
"epoch": 8.486562942008486,
"grad_norm": 0.5835816860198975,
"learning_rate": 8.340943683409437e-06,
"loss": 2.937,
"step": 6000
},
{
"epoch": 8.486562942008486,
"eval_loss": 2.921030282974243,
"eval_runtime": 136.1229,
"eval_samples_per_second": 41.551,
"eval_steps_per_second": 5.194,
"eval_wer": 1.0,
"step": 6000
},
{
"epoch": 8.910891089108912,
"eval_loss": 2.92145037651062,
"eval_runtime": 137.1094,
"eval_samples_per_second": 41.252,
"eval_steps_per_second": 5.156,
"eval_wer": 1.0,
"step": 6300
},
{
"epoch": 9.193776520509195,
"grad_norm": 0.7211419939994812,
"learning_rate": 4.558599695585997e-06,
"loss": 2.9406,
"step": 6500
},
{
"epoch": 9.335219236209335,
"eval_loss": 2.917142629623413,
"eval_runtime": 136.4229,
"eval_samples_per_second": 41.459,
"eval_steps_per_second": 5.182,
"eval_wer": 1.0,
"step": 6600
},
{
"epoch": 9.75954738330976,
"eval_loss": 2.9176828861236572,
"eval_runtime": 137.2177,
"eval_samples_per_second": 41.219,
"eval_steps_per_second": 5.152,
"eval_wer": 1.0,
"step": 6900
},
{
"epoch": 9.900990099009901,
"grad_norm": 0.0,
"learning_rate": 7.762557077625571e-07,
"loss": 2.9378,
"step": 7000
},
{
"epoch": 10.0,
"step": 7070,
"total_flos": 2.4662883830172946e+19,
"train_loss": 3.228043903960534,
"train_runtime": 12366.081,
"train_samples_per_second": 18.293,
"train_steps_per_second": 0.572
}
],
"logging_steps": 500,
"max_steps": 7070,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 600,
"total_flos": 2.4662883830172946e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}