wav2vec2-xlsr-53-ft-btb-cy / trainer_state.json
DewiBrynJones's picture
End of training
c024852 verified
raw
history blame
7.87 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.536067892503536,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14144271570014144,
"eval_loss": 4.035430908203125,
"eval_runtime": 153.5895,
"eval_samples_per_second": 36.825,
"eval_steps_per_second": 4.603,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.2828854314002829,
"eval_loss": 3.097731590270996,
"eval_runtime": 149.7934,
"eval_samples_per_second": 37.759,
"eval_steps_per_second": 4.72,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.4243281471004243,
"eval_loss": 3.0768725872039795,
"eval_runtime": 151.0662,
"eval_samples_per_second": 37.441,
"eval_steps_per_second": 4.68,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 0.5657708628005658,
"eval_loss": 1.373844027519226,
"eval_runtime": 150.8632,
"eval_samples_per_second": 37.491,
"eval_steps_per_second": 4.686,
"eval_wer": 0.8913622648777043,
"step": 400
},
{
"epoch": 0.7072135785007072,
"grad_norm": 2.2486917972564697,
"learning_rate": 0.0002964,
"loss": 3.7586,
"step": 500
},
{
"epoch": 0.7072135785007072,
"eval_loss": 1.0915191173553467,
"eval_runtime": 151.7555,
"eval_samples_per_second": 37.27,
"eval_steps_per_second": 4.659,
"eval_wer": 0.7692431148488156,
"step": 500
},
{
"epoch": 0.8486562942008486,
"eval_loss": 0.9360549449920654,
"eval_runtime": 152.9408,
"eval_samples_per_second": 36.982,
"eval_steps_per_second": 4.623,
"eval_wer": 0.6854818000898761,
"step": 600
},
{
"epoch": 0.9900990099009901,
"eval_loss": 0.8494995832443237,
"eval_runtime": 151.5832,
"eval_samples_per_second": 37.313,
"eval_steps_per_second": 4.664,
"eval_wer": 0.6247191371894459,
"step": 700
},
{
"epoch": 1.1315417256011315,
"eval_loss": 0.6885886788368225,
"eval_runtime": 151.913,
"eval_samples_per_second": 37.232,
"eval_steps_per_second": 4.654,
"eval_wer": 0.5397059767606086,
"step": 800
},
{
"epoch": 1.272984441301273,
"eval_loss": 0.6703779101371765,
"eval_runtime": 153.2384,
"eval_samples_per_second": 36.91,
"eval_steps_per_second": 4.614,
"eval_wer": 0.531215895230147,
"step": 900
},
{
"epoch": 1.4144271570014144,
"grad_norm": 0.8719882369041443,
"learning_rate": 0.0002259,
"loss": 0.8877,
"step": 1000
},
{
"epoch": 1.4144271570014144,
"eval_loss": 0.62369304895401,
"eval_runtime": 153.3779,
"eval_samples_per_second": 36.876,
"eval_steps_per_second": 4.61,
"eval_wer": 0.495040765230789,
"step": 1000
},
{
"epoch": 1.5558698727015559,
"eval_loss": 0.5992260575294495,
"eval_runtime": 152.5492,
"eval_samples_per_second": 37.077,
"eval_steps_per_second": 4.635,
"eval_wer": 0.4767606085895872,
"step": 1100
},
{
"epoch": 1.6973125884016973,
"eval_loss": 0.5729629397392273,
"eval_runtime": 152.035,
"eval_samples_per_second": 37.202,
"eval_steps_per_second": 4.65,
"eval_wer": 0.4521730756885151,
"step": 1200
},
{
"epoch": 1.8387553041018387,
"eval_loss": 0.5504249930381775,
"eval_runtime": 152.762,
"eval_samples_per_second": 37.025,
"eval_steps_per_second": 4.628,
"eval_wer": 0.4417891763497464,
"step": 1300
},
{
"epoch": 1.9801980198019802,
"eval_loss": 0.5288310050964355,
"eval_runtime": 152.1876,
"eval_samples_per_second": 37.165,
"eval_steps_per_second": 4.646,
"eval_wer": 0.42586826731719846,
"step": 1400
},
{
"epoch": 2.1216407355021216,
"grad_norm": 0.7933566570281982,
"learning_rate": 0.00015134999999999997,
"loss": 0.6844,
"step": 1500
},
{
"epoch": 2.1216407355021216,
"eval_loss": 0.5165240168571472,
"eval_runtime": 152.8573,
"eval_samples_per_second": 37.002,
"eval_steps_per_second": 4.625,
"eval_wer": 0.4216633498106182,
"step": 1500
},
{
"epoch": 2.263083451202263,
"eval_loss": 0.5071681141853333,
"eval_runtime": 152.7102,
"eval_samples_per_second": 37.037,
"eval_steps_per_second": 4.63,
"eval_wer": 0.41930410220196446,
"step": 1600
},
{
"epoch": 2.4045261669024045,
"eval_loss": 0.49842193722724915,
"eval_runtime": 153.4952,
"eval_samples_per_second": 36.848,
"eval_steps_per_second": 4.606,
"eval_wer": 0.41546831867496953,
"step": 1700
},
{
"epoch": 2.545968882602546,
"eval_loss": 0.4882013499736786,
"eval_runtime": 152.4926,
"eval_samples_per_second": 37.09,
"eval_steps_per_second": 4.636,
"eval_wer": 0.4096905694292868,
"step": 1800
},
{
"epoch": 2.6874115983026874,
"eval_loss": 0.4803565740585327,
"eval_runtime": 152.0827,
"eval_samples_per_second": 37.19,
"eval_steps_per_second": 4.649,
"eval_wer": 0.40800539256596263,
"step": 1900
},
{
"epoch": 2.828854314002829,
"grad_norm": 0.7038583159446716,
"learning_rate": 7.68e-05,
"loss": 0.537,
"step": 2000
},
{
"epoch": 2.828854314002829,
"eval_loss": 0.470017671585083,
"eval_runtime": 152.5003,
"eval_samples_per_second": 37.088,
"eval_steps_per_second": 4.636,
"eval_wer": 0.39272645567182385,
"step": 2000
},
{
"epoch": 2.9702970297029703,
"eval_loss": 0.46773043274879456,
"eval_runtime": 152.3208,
"eval_samples_per_second": 37.132,
"eval_steps_per_second": 4.642,
"eval_wer": 0.3884573409514027,
"step": 2100
},
{
"epoch": 3.1117397454031117,
"eval_loss": 0.4683248996734619,
"eval_runtime": 152.8021,
"eval_samples_per_second": 37.015,
"eval_steps_per_second": 4.627,
"eval_wer": 0.38572895936316365,
"step": 2200
},
{
"epoch": 3.253182461103253,
"eval_loss": 0.4618384838104248,
"eval_runtime": 152.4829,
"eval_samples_per_second": 37.093,
"eval_steps_per_second": 4.637,
"eval_wer": 0.3792289914617706,
"step": 2300
},
{
"epoch": 3.3946251768033946,
"eval_loss": 0.46039119362831116,
"eval_runtime": 152.4542,
"eval_samples_per_second": 37.1,
"eval_steps_per_second": 4.637,
"eval_wer": 0.3762759196250883,
"step": 2400
},
{
"epoch": 3.536067892503536,
"grad_norm": 0.6644078493118286,
"learning_rate": 2.1e-06,
"loss": 0.4434,
"step": 2500
},
{
"epoch": 3.536067892503536,
"eval_loss": 0.4588949382305145,
"eval_runtime": 152.8982,
"eval_samples_per_second": 36.992,
"eval_steps_per_second": 4.624,
"eval_wer": 0.3742697566925595,
"step": 2500
},
{
"epoch": 3.536067892503536,
"step": 2500,
"total_flos": 9.55169606524761e+18,
"train_loss": 1.2622116821289062,
"train_runtime": 6522.8506,
"train_samples_per_second": 12.265,
"train_steps_per_second": 0.383
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"total_flos": 9.55169606524761e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}