whisper3 / trainer_state.json
khaingsmon's picture
cheers again
4288260 verified
{
"best_metric": 23.35348393254852,
"best_model_checkpoint": "whisper3/checkpoint-240",
"epoch": 8.333333333333334,
"eval_steps": 10,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1388888888888889,
"grad_norm": 46.06148147583008,
"learning_rate": 1.0000000000000002e-06,
"loss": 3.9402,
"step": 5
},
{
"epoch": 0.2777777777777778,
"grad_norm": 43.4765625,
"learning_rate": 2.0000000000000003e-06,
"loss": 3.8281,
"step": 10
},
{
"epoch": 0.2777777777777778,
"eval_loss": 3.7929115295410156,
"eval_runtime": 253.0403,
"eval_samples_per_second": 1.976,
"eval_steps_per_second": 0.249,
"eval_wer": 80.40089086859689,
"step": 10
},
{
"epoch": 0.4166666666666667,
"grad_norm": 40.57815933227539,
"learning_rate": 3e-06,
"loss": 3.5929,
"step": 15
},
{
"epoch": 0.5555555555555556,
"grad_norm": 39.72583770751953,
"learning_rate": 4.000000000000001e-06,
"loss": 3.209,
"step": 20
},
{
"epoch": 0.5555555555555556,
"eval_loss": 3.0014312267303467,
"eval_runtime": 246.2101,
"eval_samples_per_second": 2.031,
"eval_steps_per_second": 0.256,
"eval_wer": 68.37416481069042,
"step": 20
},
{
"epoch": 0.6944444444444444,
"grad_norm": 39.53627395629883,
"learning_rate": 5e-06,
"loss": 2.7486,
"step": 25
},
{
"epoch": 0.8333333333333334,
"grad_norm": 30.079750061035156,
"learning_rate": 6e-06,
"loss": 2.1066,
"step": 30
},
{
"epoch": 0.8333333333333334,
"eval_loss": 1.761271595954895,
"eval_runtime": 245.5315,
"eval_samples_per_second": 2.036,
"eval_steps_per_second": 0.257,
"eval_wer": 63.91982182628062,
"step": 30
},
{
"epoch": 0.9722222222222222,
"grad_norm": 19.831071853637695,
"learning_rate": 7.000000000000001e-06,
"loss": 1.5134,
"step": 35
},
{
"epoch": 1.1111111111111112,
"grad_norm": 9.755999565124512,
"learning_rate": 8.000000000000001e-06,
"loss": 0.9963,
"step": 40
},
{
"epoch": 1.1111111111111112,
"eval_loss": 0.8740884065628052,
"eval_runtime": 246.6146,
"eval_samples_per_second": 2.027,
"eval_steps_per_second": 0.255,
"eval_wer": 52.43398027362392,
"step": 40
},
{
"epoch": 1.25,
"grad_norm": 6.842897891998291,
"learning_rate": 9e-06,
"loss": 0.786,
"step": 45
},
{
"epoch": 1.3888888888888888,
"grad_norm": 5.720729351043701,
"learning_rate": 1e-05,
"loss": 0.6922,
"step": 50
},
{
"epoch": 1.3888888888888888,
"eval_loss": 0.7008740901947021,
"eval_runtime": 245.5713,
"eval_samples_per_second": 2.036,
"eval_steps_per_second": 0.257,
"eval_wer": 35.82564428889596,
"step": 50
},
{
"epoch": 1.5277777777777777,
"grad_norm": 4.806775093078613,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.6427,
"step": 55
},
{
"epoch": 1.6666666666666665,
"grad_norm": 5.128376483917236,
"learning_rate": 1.2e-05,
"loss": 0.5816,
"step": 60
},
{
"epoch": 1.6666666666666665,
"eval_loss": 0.6238442659378052,
"eval_runtime": 245.679,
"eval_samples_per_second": 2.035,
"eval_steps_per_second": 0.256,
"eval_wer": 31.148584155265667,
"step": 60
},
{
"epoch": 1.8055555555555556,
"grad_norm": 4.993675231933594,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.5805,
"step": 65
},
{
"epoch": 1.9444444444444444,
"grad_norm": 4.856825351715088,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.5684,
"step": 70
},
{
"epoch": 1.9444444444444444,
"eval_loss": 0.5697694420814514,
"eval_runtime": 245.5413,
"eval_samples_per_second": 2.036,
"eval_steps_per_second": 0.257,
"eval_wer": 35.47566019726376,
"step": 70
},
{
"epoch": 2.0833333333333335,
"grad_norm": 4.464582443237305,
"learning_rate": 1.5e-05,
"loss": 0.4534,
"step": 75
},
{
"epoch": 2.2222222222222223,
"grad_norm": 4.251033306121826,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.427,
"step": 80
},
{
"epoch": 2.2222222222222223,
"eval_loss": 0.5380394458770752,
"eval_runtime": 244.4819,
"eval_samples_per_second": 2.045,
"eval_steps_per_second": 0.258,
"eval_wer": 27.266942411708563,
"step": 80
},
{
"epoch": 2.361111111111111,
"grad_norm": 4.489510536193848,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.3929,
"step": 85
},
{
"epoch": 2.5,
"grad_norm": 4.552371025085449,
"learning_rate": 1.8e-05,
"loss": 0.4395,
"step": 90
},
{
"epoch": 2.5,
"eval_loss": 0.5162410140037537,
"eval_runtime": 245.2373,
"eval_samples_per_second": 2.039,
"eval_steps_per_second": 0.257,
"eval_wer": 32.73942093541203,
"step": 90
},
{
"epoch": 2.638888888888889,
"grad_norm": 4.691618919372559,
"learning_rate": 1.9e-05,
"loss": 0.3825,
"step": 95
},
{
"epoch": 2.7777777777777777,
"grad_norm": 4.219367027282715,
"learning_rate": 2e-05,
"loss": 0.3861,
"step": 100
},
{
"epoch": 2.7777777777777777,
"eval_loss": 0.495292991399765,
"eval_runtime": 243.4193,
"eval_samples_per_second": 2.054,
"eval_steps_per_second": 0.259,
"eval_wer": 24.530703149856826,
"step": 100
},
{
"epoch": 2.9166666666666665,
"grad_norm": 4.323045253753662,
"learning_rate": 2.1e-05,
"loss": 0.3669,
"step": 105
},
{
"epoch": 3.0555555555555554,
"grad_norm": 3.2159509658813477,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.3745,
"step": 110
},
{
"epoch": 3.0555555555555554,
"eval_loss": 0.4837464392185211,
"eval_runtime": 244.5759,
"eval_samples_per_second": 2.044,
"eval_steps_per_second": 0.258,
"eval_wer": 24.626153356665608,
"step": 110
},
{
"epoch": 3.1944444444444446,
"grad_norm": 3.675457000732422,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.257,
"step": 115
},
{
"epoch": 3.3333333333333335,
"grad_norm": 2.8939876556396484,
"learning_rate": 2.4e-05,
"loss": 0.2487,
"step": 120
},
{
"epoch": 3.3333333333333335,
"eval_loss": 0.4732927978038788,
"eval_runtime": 244.6891,
"eval_samples_per_second": 2.043,
"eval_steps_per_second": 0.257,
"eval_wer": 23.57620108176901,
"step": 120
},
{
"epoch": 3.4722222222222223,
"grad_norm": 3.4589827060699463,
"learning_rate": 2.5e-05,
"loss": 0.253,
"step": 125
},
{
"epoch": 3.611111111111111,
"grad_norm": 3.1798577308654785,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.2343,
"step": 130
},
{
"epoch": 3.611111111111111,
"eval_loss": 0.46519017219543457,
"eval_runtime": 244.3925,
"eval_samples_per_second": 2.046,
"eval_steps_per_second": 0.258,
"eval_wer": 24.94432071269488,
"step": 130
},
{
"epoch": 3.75,
"grad_norm": 4.061887741088867,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.2354,
"step": 135
},
{
"epoch": 3.888888888888889,
"grad_norm": 4.474591255187988,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.2429,
"step": 140
},
{
"epoch": 3.888888888888889,
"eval_loss": 0.4581267833709717,
"eval_runtime": 244.836,
"eval_samples_per_second": 2.042,
"eval_steps_per_second": 0.257,
"eval_wer": 24.085268851415844,
"step": 140
},
{
"epoch": 4.027777777777778,
"grad_norm": 2.3235318660736084,
"learning_rate": 2.9e-05,
"loss": 0.2728,
"step": 145
},
{
"epoch": 4.166666666666667,
"grad_norm": 2.3824808597564697,
"learning_rate": 3e-05,
"loss": 0.1286,
"step": 150
},
{
"epoch": 4.166666666666667,
"eval_loss": 0.46725359559059143,
"eval_runtime": 245.6982,
"eval_samples_per_second": 2.035,
"eval_steps_per_second": 0.256,
"eval_wer": 24.276169265033406,
"step": 150
},
{
"epoch": 4.305555555555555,
"grad_norm": 2.5686404705047607,
"learning_rate": 3.1e-05,
"loss": 0.1301,
"step": 155
},
{
"epoch": 4.444444444444445,
"grad_norm": 2.7436068058013916,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.1304,
"step": 160
},
{
"epoch": 4.444444444444445,
"eval_loss": 0.46984970569610596,
"eval_runtime": 245.0991,
"eval_samples_per_second": 2.04,
"eval_steps_per_second": 0.257,
"eval_wer": 31.72128539611836,
"step": 160
},
{
"epoch": 4.583333333333333,
"grad_norm": 2.83823823928833,
"learning_rate": 3.3e-05,
"loss": 0.1408,
"step": 165
},
{
"epoch": 4.722222222222222,
"grad_norm": 2.7204811573028564,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.1361,
"step": 170
},
{
"epoch": 4.722222222222222,
"eval_loss": 0.4690161943435669,
"eval_runtime": 246.5232,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 0.256,
"eval_wer": 33.08940502704423,
"step": 170
},
{
"epoch": 4.861111111111111,
"grad_norm": 3.671097993850708,
"learning_rate": 3.5e-05,
"loss": 0.1511,
"step": 175
},
{
"epoch": 5.0,
"grad_norm": 6.484060764312744,
"learning_rate": 3.6e-05,
"loss": 0.1447,
"step": 180
},
{
"epoch": 5.0,
"eval_loss": 0.4811546802520752,
"eval_runtime": 244.9356,
"eval_samples_per_second": 2.041,
"eval_steps_per_second": 0.257,
"eval_wer": 24.657970092268535,
"step": 180
},
{
"epoch": 5.138888888888889,
"grad_norm": 1.9667352437973022,
"learning_rate": 3.7e-05,
"loss": 0.063,
"step": 185
},
{
"epoch": 5.277777777777778,
"grad_norm": 2.1828482151031494,
"learning_rate": 3.8e-05,
"loss": 0.0617,
"step": 190
},
{
"epoch": 5.277777777777778,
"eval_loss": 0.48713362216949463,
"eval_runtime": 244.9851,
"eval_samples_per_second": 2.041,
"eval_steps_per_second": 0.257,
"eval_wer": 29.939548202354437,
"step": 190
},
{
"epoch": 5.416666666666667,
"grad_norm": 1.8774911165237427,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0606,
"step": 195
},
{
"epoch": 5.555555555555555,
"grad_norm": 1.8562583923339844,
"learning_rate": 4e-05,
"loss": 0.0617,
"step": 200
},
{
"epoch": 5.555555555555555,
"eval_loss": 0.488438218832016,
"eval_runtime": 244.9014,
"eval_samples_per_second": 2.042,
"eval_steps_per_second": 0.257,
"eval_wer": 24.848870505886094,
"step": 200
},
{
"epoch": 5.694444444444445,
"grad_norm": 1.9106348752975464,
"learning_rate": 4.1e-05,
"loss": 0.0617,
"step": 205
},
{
"epoch": 5.833333333333333,
"grad_norm": 1.8114972114562988,
"learning_rate": 4.2e-05,
"loss": 0.0577,
"step": 210
},
{
"epoch": 5.833333333333333,
"eval_loss": 0.4998014569282532,
"eval_runtime": 244.1029,
"eval_samples_per_second": 2.048,
"eval_steps_per_second": 0.258,
"eval_wer": 26.853324848870507,
"step": 210
},
{
"epoch": 5.972222222222222,
"grad_norm": 5.00437593460083,
"learning_rate": 4.3e-05,
"loss": 0.078,
"step": 215
},
{
"epoch": 6.111111111111111,
"grad_norm": 1.4013047218322754,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.038,
"step": 220
},
{
"epoch": 6.111111111111111,
"eval_loss": 0.500673770904541,
"eval_runtime": 247.5538,
"eval_samples_per_second": 2.02,
"eval_steps_per_second": 0.254,
"eval_wer": 24.848870505886094,
"step": 220
},
{
"epoch": 6.25,
"grad_norm": 1.4778488874435425,
"learning_rate": 4.5e-05,
"loss": 0.0243,
"step": 225
},
{
"epoch": 6.388888888888889,
"grad_norm": 1.3681198358535767,
"learning_rate": 4.600000000000001e-05,
"loss": 0.0269,
"step": 230
},
{
"epoch": 6.388888888888889,
"eval_loss": 0.5122880935668945,
"eval_runtime": 243.6648,
"eval_samples_per_second": 2.052,
"eval_steps_per_second": 0.259,
"eval_wer": 27.139675469296847,
"step": 230
},
{
"epoch": 6.527777777777778,
"grad_norm": 1.450726866722107,
"learning_rate": 4.7e-05,
"loss": 0.0297,
"step": 235
},
{
"epoch": 6.666666666666667,
"grad_norm": 1.4052125215530396,
"learning_rate": 4.8e-05,
"loss": 0.0321,
"step": 240
},
{
"epoch": 6.666666666666667,
"eval_loss": 0.500522792339325,
"eval_runtime": 247.602,
"eval_samples_per_second": 2.019,
"eval_steps_per_second": 0.254,
"eval_wer": 23.35348393254852,
"step": 240
},
{
"epoch": 6.805555555555555,
"grad_norm": 1.2223644256591797,
"learning_rate": 4.9e-05,
"loss": 0.0291,
"step": 245
},
{
"epoch": 6.944444444444445,
"grad_norm": 1.463398814201355,
"learning_rate": 5e-05,
"loss": 0.0296,
"step": 250
},
{
"epoch": 6.944444444444445,
"eval_loss": 0.5332342386245728,
"eval_runtime": 246.3422,
"eval_samples_per_second": 2.03,
"eval_steps_per_second": 0.256,
"eval_wer": 31.880369074132993,
"step": 250
},
{
"epoch": 7.083333333333333,
"grad_norm": 4.257472991943359,
"learning_rate": 5.1000000000000006e-05,
"loss": 0.027,
"step": 255
},
{
"epoch": 7.222222222222222,
"grad_norm": 2.294562339782715,
"learning_rate": 5.2000000000000004e-05,
"loss": 0.0207,
"step": 260
},
{
"epoch": 7.222222222222222,
"eval_loss": 0.5236981511116028,
"eval_runtime": 244.1894,
"eval_samples_per_second": 2.048,
"eval_steps_per_second": 0.258,
"eval_wer": 30.066815144766146,
"step": 260
},
{
"epoch": 7.361111111111111,
"grad_norm": 1.2468712329864502,
"learning_rate": 5.300000000000001e-05,
"loss": 0.0228,
"step": 265
},
{
"epoch": 7.5,
"grad_norm": 1.8487240076065063,
"learning_rate": 5.4000000000000005e-05,
"loss": 0.0215,
"step": 270
},
{
"epoch": 7.5,
"eval_loss": 0.5222529768943787,
"eval_runtime": 243.6778,
"eval_samples_per_second": 2.052,
"eval_steps_per_second": 0.259,
"eval_wer": 25.548838689150493,
"step": 270
},
{
"epoch": 7.638888888888889,
"grad_norm": 1.1909741163253784,
"learning_rate": 5.500000000000001e-05,
"loss": 0.0201,
"step": 275
},
{
"epoch": 7.777777777777778,
"grad_norm": 1.6141778230667114,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.0198,
"step": 280
},
{
"epoch": 7.777777777777778,
"eval_loss": 0.5157026648521423,
"eval_runtime": 244.0734,
"eval_samples_per_second": 2.049,
"eval_steps_per_second": 0.258,
"eval_wer": 30.194082087177854,
"step": 280
},
{
"epoch": 7.916666666666667,
"grad_norm": 1.1372332572937012,
"learning_rate": 5.6999999999999996e-05,
"loss": 0.0193,
"step": 285
},
{
"epoch": 8.055555555555555,
"grad_norm": 2.210016965866089,
"learning_rate": 5.8e-05,
"loss": 0.0273,
"step": 290
},
{
"epoch": 8.055555555555555,
"eval_loss": 0.5289562940597534,
"eval_runtime": 243.9152,
"eval_samples_per_second": 2.05,
"eval_steps_per_second": 0.258,
"eval_wer": 27.553293032134903,
"step": 290
},
{
"epoch": 8.194444444444445,
"grad_norm": 1.942575454711914,
"learning_rate": 5.9e-05,
"loss": 0.0201,
"step": 295
},
{
"epoch": 8.333333333333334,
"grad_norm": 1.3640440702438354,
"learning_rate": 6e-05,
"loss": 0.0197,
"step": 300
},
{
"epoch": 8.333333333333334,
"eval_loss": 0.5509196519851685,
"eval_runtime": 243.9508,
"eval_samples_per_second": 2.05,
"eval_steps_per_second": 0.258,
"eval_wer": 26.948775055679285,
"step": 300
},
{
"epoch": 8.333333333333334,
"step": 300,
"total_flos": 9.2409447186432e+17,
"train_loss": 0.5431244759509961,
"train_runtime": 10016.0212,
"train_samples_per_second": 3.834,
"train_steps_per_second": 0.03
}
],
"logging_steps": 5,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 10,
"total_flos": 9.2409447186432e+17,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}