Fine_tune_large_v2_adam_8bit / trainer_state.json
Daya7624's picture
First commit
2ecc75e
{
"best_metric": 41.11150694691842,
"best_model_checkpoint": "whisper_large_v2_adam_8bit/checkpoint-485",
"epoch": 17.962962962962962,
"global_step": 485,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 8.000000000000001e-07,
"loss": 1.0874,
"step": 5
},
{
"epoch": 0.19,
"eval_loss": 1.0718352794647217,
"eval_runtime": 305.4443,
"eval_samples_per_second": 0.262,
"eval_steps_per_second": 0.131,
"eval_wer": 54.82721767011044,
"step": 5
},
{
"epoch": 0.37,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.0169,
"step": 10
},
{
"epoch": 0.37,
"eval_loss": 0.895382285118103,
"eval_runtime": 335.7721,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 57.14285714285714,
"step": 10
},
{
"epoch": 0.56,
"learning_rate": 2.6e-06,
"loss": 0.8095,
"step": 15
},
{
"epoch": 0.56,
"eval_loss": 0.7291234135627747,
"eval_runtime": 341.1764,
"eval_samples_per_second": 0.234,
"eval_steps_per_second": 0.117,
"eval_wer": 59.707873174207336,
"step": 15
},
{
"epoch": 0.74,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.6586,
"step": 20
},
{
"epoch": 0.74,
"eval_loss": 0.5784670114517212,
"eval_runtime": 208.1203,
"eval_samples_per_second": 0.384,
"eval_steps_per_second": 0.192,
"eval_wer": 83.07801923762024,
"step": 20
},
{
"epoch": 0.93,
"learning_rate": 4.600000000000001e-06,
"loss": 0.5849,
"step": 25
},
{
"epoch": 0.93,
"eval_loss": 0.5224391222000122,
"eval_runtime": 251.382,
"eval_samples_per_second": 0.318,
"eval_steps_per_second": 0.159,
"eval_wer": 91.73494834342715,
"step": 25
},
{
"epoch": 1.11,
"learning_rate": 5.600000000000001e-06,
"loss": 0.4623,
"step": 30
},
{
"epoch": 1.11,
"eval_loss": 0.48158684372901917,
"eval_runtime": 279.2357,
"eval_samples_per_second": 0.286,
"eval_steps_per_second": 0.143,
"eval_wer": 72.99608122550765,
"step": 30
},
{
"epoch": 1.3,
"learning_rate": 6.600000000000001e-06,
"loss": 0.4459,
"step": 35
},
{
"epoch": 1.3,
"eval_loss": 0.4286937713623047,
"eval_runtime": 232.3372,
"eval_samples_per_second": 0.344,
"eval_steps_per_second": 0.172,
"eval_wer": 76.0242251514072,
"step": 35
},
{
"epoch": 1.48,
"learning_rate": 7.600000000000001e-06,
"loss": 0.3496,
"step": 40
},
{
"epoch": 1.48,
"eval_loss": 0.4074183404445648,
"eval_runtime": 289.554,
"eval_samples_per_second": 0.276,
"eval_steps_per_second": 0.138,
"eval_wer": 48.6284289276808,
"step": 40
},
{
"epoch": 1.67,
"learning_rate": 8.6e-06,
"loss": 0.3964,
"step": 45
},
{
"epoch": 1.67,
"eval_loss": 0.3671049177646637,
"eval_runtime": 356.7998,
"eval_samples_per_second": 0.224,
"eval_steps_per_second": 0.112,
"eval_wer": 57.67723548272177,
"step": 45
},
{
"epoch": 1.85,
"learning_rate": 9.600000000000001e-06,
"loss": 0.3506,
"step": 50
},
{
"epoch": 1.85,
"eval_loss": 0.37803885340690613,
"eval_runtime": 338.1245,
"eval_samples_per_second": 0.237,
"eval_steps_per_second": 0.118,
"eval_wer": 49.76843605272533,
"step": 50
},
{
"epoch": 2.04,
"learning_rate": 9.96842105263158e-06,
"loss": 0.377,
"step": 55
},
{
"epoch": 2.04,
"eval_loss": 0.35833048820495605,
"eval_runtime": 344.9289,
"eval_samples_per_second": 0.232,
"eval_steps_per_second": 0.116,
"eval_wer": 56.21660135375846,
"step": 55
},
{
"epoch": 2.22,
"learning_rate": 9.915789473684211e-06,
"loss": 0.2766,
"step": 60
},
{
"epoch": 2.22,
"eval_loss": 0.37928158044815063,
"eval_runtime": 350.8056,
"eval_samples_per_second": 0.228,
"eval_steps_per_second": 0.114,
"eval_wer": 83.0423940149626,
"step": 60
},
{
"epoch": 2.41,
"learning_rate": 9.863157894736843e-06,
"loss": 0.2522,
"step": 65
},
{
"epoch": 2.41,
"eval_loss": 0.3648552894592285,
"eval_runtime": 343.605,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.116,
"eval_wer": 69.89668685429284,
"step": 65
},
{
"epoch": 2.59,
"learning_rate": 9.810526315789475e-06,
"loss": 0.2793,
"step": 70
},
{
"epoch": 2.59,
"eval_loss": 0.36223381757736206,
"eval_runtime": 325.8454,
"eval_samples_per_second": 0.246,
"eval_steps_per_second": 0.123,
"eval_wer": 48.37905236907731,
"step": 70
},
{
"epoch": 2.78,
"learning_rate": 9.757894736842106e-06,
"loss": 0.27,
"step": 75
},
{
"epoch": 2.78,
"eval_loss": 0.38781291246414185,
"eval_runtime": 331.2847,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 48.09405058781617,
"step": 75
},
{
"epoch": 2.96,
"learning_rate": 9.715789473684212e-06,
"loss": 0.2624,
"step": 80
},
{
"epoch": 2.96,
"eval_loss": 0.38262635469436646,
"eval_runtime": 329.4177,
"eval_samples_per_second": 0.243,
"eval_steps_per_second": 0.121,
"eval_wer": 47.167794798717495,
"step": 80
},
{
"epoch": 3.15,
"learning_rate": 9.663157894736843e-06,
"loss": 0.225,
"step": 85
},
{
"epoch": 3.15,
"eval_loss": 0.38125258684158325,
"eval_runtime": 310.0949,
"eval_samples_per_second": 0.258,
"eval_steps_per_second": 0.129,
"eval_wer": 54.39971499821874,
"step": 85
},
{
"epoch": 3.33,
"learning_rate": 9.610526315789475e-06,
"loss": 0.2062,
"step": 90
},
{
"epoch": 3.33,
"eval_loss": 0.3962409198284149,
"eval_runtime": 299.3235,
"eval_samples_per_second": 0.267,
"eval_steps_per_second": 0.134,
"eval_wer": 72.92483078019238,
"step": 90
},
{
"epoch": 3.52,
"learning_rate": 9.557894736842107e-06,
"loss": 0.192,
"step": 95
},
{
"epoch": 3.52,
"eval_loss": 0.3863430619239807,
"eval_runtime": 332.6447,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 59.17349483434271,
"step": 95
},
{
"epoch": 3.7,
"learning_rate": 9.505263157894738e-06,
"loss": 0.224,
"step": 100
},
{
"epoch": 3.7,
"eval_loss": 0.38708847761154175,
"eval_runtime": 330.6389,
"eval_samples_per_second": 0.242,
"eval_steps_per_second": 0.121,
"eval_wer": 54.39971499821874,
"step": 100
},
{
"epoch": 3.89,
"learning_rate": 9.452631578947368e-06,
"loss": 0.2127,
"step": 105
},
{
"epoch": 3.89,
"eval_loss": 0.41648903489112854,
"eval_runtime": 357.2142,
"eval_samples_per_second": 0.224,
"eval_steps_per_second": 0.112,
"eval_wer": 70.07481296758104,
"step": 105
},
{
"epoch": 4.07,
"learning_rate": 9.4e-06,
"loss": 0.2033,
"step": 110
},
{
"epoch": 4.07,
"eval_loss": 0.39047971367836,
"eval_runtime": 316.4665,
"eval_samples_per_second": 0.253,
"eval_steps_per_second": 0.126,
"eval_wer": 50.69469184182401,
"step": 110
},
{
"epoch": 4.26,
"learning_rate": 9.347368421052633e-06,
"loss": 0.1426,
"step": 115
},
{
"epoch": 4.26,
"eval_loss": 0.41405850648880005,
"eval_runtime": 326.6943,
"eval_samples_per_second": 0.245,
"eval_steps_per_second": 0.122,
"eval_wer": 44.53152832205201,
"step": 115
},
{
"epoch": 4.44,
"learning_rate": 9.294736842105265e-06,
"loss": 0.1737,
"step": 120
},
{
"epoch": 4.44,
"eval_loss": 0.4162759780883789,
"eval_runtime": 334.7907,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.119,
"eval_wer": 47.52404702529391,
"step": 120
},
{
"epoch": 4.63,
"learning_rate": 9.242105263157896e-06,
"loss": 0.1404,
"step": 125
},
{
"epoch": 4.63,
"eval_loss": 0.4179295599460602,
"eval_runtime": 313.7669,
"eval_samples_per_second": 0.255,
"eval_steps_per_second": 0.127,
"eval_wer": 47.951549697185605,
"step": 125
},
{
"epoch": 4.81,
"learning_rate": 9.189473684210526e-06,
"loss": 0.1738,
"step": 130
},
{
"epoch": 4.81,
"eval_loss": 0.4070858061313629,
"eval_runtime": 322.5875,
"eval_samples_per_second": 0.248,
"eval_steps_per_second": 0.124,
"eval_wer": 43.747773423583894,
"step": 130
},
{
"epoch": 5.0,
"learning_rate": 9.136842105263158e-06,
"loss": 0.1852,
"step": 135
},
{
"epoch": 5.0,
"eval_loss": 0.3889687657356262,
"eval_runtime": 331.8197,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 65.94228713929462,
"step": 135
},
{
"epoch": 5.19,
"learning_rate": 9.08421052631579e-06,
"loss": 0.1111,
"step": 140
},
{
"epoch": 5.19,
"eval_loss": 0.4467521607875824,
"eval_runtime": 340.6568,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.117,
"eval_wer": 47.5952974706092,
"step": 140
},
{
"epoch": 5.37,
"learning_rate": 9.031578947368423e-06,
"loss": 0.1642,
"step": 145
},
{
"epoch": 5.37,
"eval_loss": 0.47300252318382263,
"eval_runtime": 330.5579,
"eval_samples_per_second": 0.242,
"eval_steps_per_second": 0.121,
"eval_wer": 53.152832205201285,
"step": 145
},
{
"epoch": 5.56,
"learning_rate": 8.978947368421055e-06,
"loss": 0.1552,
"step": 150
},
{
"epoch": 5.56,
"eval_loss": 0.44687384366989136,
"eval_runtime": 317.2285,
"eval_samples_per_second": 0.252,
"eval_steps_per_second": 0.126,
"eval_wer": 65.62166013537585,
"step": 150
},
{
"epoch": 5.74,
"learning_rate": 8.926315789473685e-06,
"loss": 0.1497,
"step": 155
},
{
"epoch": 5.74,
"eval_loss": 0.44148778915405273,
"eval_runtime": 315.1611,
"eval_samples_per_second": 0.254,
"eval_steps_per_second": 0.127,
"eval_wer": 48.05842536515853,
"step": 155
},
{
"epoch": 5.93,
"learning_rate": 8.873684210526316e-06,
"loss": 0.1419,
"step": 160
},
{
"epoch": 5.93,
"eval_loss": 0.4392489492893219,
"eval_runtime": 336.253,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 43.9258995368721,
"step": 160
},
{
"epoch": 6.11,
"learning_rate": 8.821052631578948e-06,
"loss": 0.1176,
"step": 165
},
{
"epoch": 6.11,
"eval_loss": 0.44891494512557983,
"eval_runtime": 328.7909,
"eval_samples_per_second": 0.243,
"eval_steps_per_second": 0.122,
"eval_wer": 41.1827573922337,
"step": 165
},
{
"epoch": 6.3,
"learning_rate": 8.76842105263158e-06,
"loss": 0.0996,
"step": 170
},
{
"epoch": 6.3,
"eval_loss": 0.505344569683075,
"eval_runtime": 338.587,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.118,
"eval_wer": 46.81154257214107,
"step": 170
},
{
"epoch": 6.48,
"learning_rate": 8.715789473684211e-06,
"loss": 0.1376,
"step": 175
},
{
"epoch": 6.48,
"eval_loss": 0.48324888944625854,
"eval_runtime": 326.4874,
"eval_samples_per_second": 0.245,
"eval_steps_per_second": 0.123,
"eval_wer": 52.29782686141788,
"step": 175
},
{
"epoch": 6.67,
"learning_rate": 8.663157894736843e-06,
"loss": 0.1093,
"step": 180
},
{
"epoch": 6.67,
"eval_loss": 0.4419935643672943,
"eval_runtime": 340.888,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.117,
"eval_wer": 54.39971499821874,
"step": 180
},
{
"epoch": 6.85,
"learning_rate": 8.610526315789474e-06,
"loss": 0.1325,
"step": 185
},
{
"epoch": 6.85,
"eval_loss": 0.4463180899620056,
"eval_runtime": 333.0453,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 45.92091200570004,
"step": 185
},
{
"epoch": 7.04,
"learning_rate": 8.557894736842106e-06,
"loss": 0.1151,
"step": 190
},
{
"epoch": 7.04,
"eval_loss": 0.4659479558467865,
"eval_runtime": 321.4982,
"eval_samples_per_second": 0.249,
"eval_steps_per_second": 0.124,
"eval_wer": 46.09903811898825,
"step": 190
},
{
"epoch": 7.22,
"learning_rate": 8.505263157894738e-06,
"loss": 0.0892,
"step": 195
},
{
"epoch": 7.22,
"eval_loss": 0.4642786979675293,
"eval_runtime": 304.1157,
"eval_samples_per_second": 0.263,
"eval_steps_per_second": 0.132,
"eval_wer": 61.9878874242964,
"step": 195
},
{
"epoch": 7.41,
"learning_rate": 8.45263157894737e-06,
"loss": 0.0819,
"step": 200
},
{
"epoch": 7.41,
"eval_loss": 0.4606548845767975,
"eval_runtime": 314.9466,
"eval_samples_per_second": 0.254,
"eval_steps_per_second": 0.127,
"eval_wer": 49.162807267545425,
"step": 200
},
{
"epoch": 7.59,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0881,
"step": 205
},
{
"epoch": 7.59,
"eval_loss": 0.49518561363220215,
"eval_runtime": 333.9165,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 47.167794798717495,
"step": 205
},
{
"epoch": 7.78,
"learning_rate": 8.347368421052633e-06,
"loss": 0.0902,
"step": 210
},
{
"epoch": 7.78,
"eval_loss": 0.48233914375305176,
"eval_runtime": 323.2853,
"eval_samples_per_second": 0.247,
"eval_steps_per_second": 0.124,
"eval_wer": 47.98717491984325,
"step": 210
},
{
"epoch": 7.96,
"learning_rate": 8.294736842105264e-06,
"loss": 0.1125,
"step": 215
},
{
"epoch": 7.96,
"eval_loss": 0.48526009917259216,
"eval_runtime": 331.099,
"eval_samples_per_second": 0.242,
"eval_steps_per_second": 0.121,
"eval_wer": 59.45849661560385,
"step": 215
},
{
"epoch": 8.15,
"learning_rate": 8.242105263157896e-06,
"loss": 0.0668,
"step": 220
},
{
"epoch": 8.15,
"eval_loss": 0.5275024175643921,
"eval_runtime": 310.2348,
"eval_samples_per_second": 0.258,
"eval_steps_per_second": 0.129,
"eval_wer": 58.42536515853224,
"step": 220
},
{
"epoch": 8.33,
"learning_rate": 8.189473684210527e-06,
"loss": 0.0723,
"step": 225
},
{
"epoch": 8.33,
"eval_loss": 0.532504141330719,
"eval_runtime": 331.9501,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 46.775917349483436,
"step": 225
},
{
"epoch": 8.52,
"learning_rate": 8.136842105263159e-06,
"loss": 0.0825,
"step": 230
},
{
"epoch": 8.52,
"eval_loss": 0.5000892877578735,
"eval_runtime": 328.9744,
"eval_samples_per_second": 0.243,
"eval_steps_per_second": 0.122,
"eval_wer": 44.21090131813324,
"step": 230
},
{
"epoch": 8.7,
"learning_rate": 8.08421052631579e-06,
"loss": 0.0859,
"step": 235
},
{
"epoch": 8.7,
"eval_loss": 0.5018569827079773,
"eval_runtime": 321.0271,
"eval_samples_per_second": 0.249,
"eval_steps_per_second": 0.125,
"eval_wer": 55.68222301389384,
"step": 235
},
{
"epoch": 8.89,
"learning_rate": 8.03157894736842e-06,
"loss": 0.0838,
"step": 240
},
{
"epoch": 8.89,
"eval_loss": 0.5195188522338867,
"eval_runtime": 326.5564,
"eval_samples_per_second": 0.245,
"eval_steps_per_second": 0.122,
"eval_wer": 50.65906661916637,
"step": 240
},
{
"epoch": 9.07,
"learning_rate": 7.978947368421052e-06,
"loss": 0.0634,
"step": 245
},
{
"epoch": 9.07,
"eval_loss": 0.5121615529060364,
"eval_runtime": 333.3682,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 45.42215888849305,
"step": 245
},
{
"epoch": 9.26,
"learning_rate": 7.926315789473686e-06,
"loss": 0.0611,
"step": 250
},
{
"epoch": 9.26,
"eval_loss": 0.53472900390625,
"eval_runtime": 334.7977,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.119,
"eval_wer": 45.38653366583541,
"step": 250
},
{
"epoch": 9.44,
"learning_rate": 7.873684210526317e-06,
"loss": 0.0742,
"step": 255
},
{
"epoch": 9.44,
"eval_loss": 0.5380967855453491,
"eval_runtime": 341.6365,
"eval_samples_per_second": 0.234,
"eval_steps_per_second": 0.117,
"eval_wer": 46.170288564303526,
"step": 255
},
{
"epoch": 9.63,
"learning_rate": 7.821052631578949e-06,
"loss": 0.0544,
"step": 260
},
{
"epoch": 9.63,
"eval_loss": 0.5325397253036499,
"eval_runtime": 344.44,
"eval_samples_per_second": 0.232,
"eval_steps_per_second": 0.116,
"eval_wer": 52.08407552547203,
"step": 260
},
{
"epoch": 9.81,
"learning_rate": 7.768421052631579e-06,
"loss": 0.0832,
"step": 265
},
{
"epoch": 9.81,
"eval_loss": 0.5298484563827515,
"eval_runtime": 338.7288,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.118,
"eval_wer": 46.81154257214107,
"step": 265
},
{
"epoch": 10.0,
"learning_rate": 7.71578947368421e-06,
"loss": 0.079,
"step": 270
},
{
"epoch": 10.0,
"eval_loss": 0.5149779319763184,
"eval_runtime": 322.201,
"eval_samples_per_second": 0.248,
"eval_steps_per_second": 0.124,
"eval_wer": 49.447809048806555,
"step": 270
},
{
"epoch": 10.19,
"learning_rate": 7.663157894736842e-06,
"loss": 0.0692,
"step": 275
},
{
"epoch": 10.19,
"eval_loss": 0.5358998775482178,
"eval_runtime": 304.2081,
"eval_samples_per_second": 0.263,
"eval_steps_per_second": 0.131,
"eval_wer": 50.3384396152476,
"step": 275
},
{
"epoch": 10.37,
"learning_rate": 7.610526315789474e-06,
"loss": 0.0549,
"step": 280
},
{
"epoch": 10.37,
"eval_loss": 0.5207683444023132,
"eval_runtime": 333.629,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 49.12718204488778,
"step": 280
},
{
"epoch": 10.56,
"learning_rate": 7.557894736842106e-06,
"loss": 0.0533,
"step": 285
},
{
"epoch": 10.56,
"eval_loss": 0.528590202331543,
"eval_runtime": 332.7917,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 47.02529390808692,
"step": 285
},
{
"epoch": 10.74,
"learning_rate": 7.505263157894738e-06,
"loss": 0.0821,
"step": 290
},
{
"epoch": 10.74,
"eval_loss": 0.51960289478302,
"eval_runtime": 332.8933,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 46.24153900961881,
"step": 290
},
{
"epoch": 10.93,
"learning_rate": 7.4526315789473695e-06,
"loss": 0.0672,
"step": 295
},
{
"epoch": 10.93,
"eval_loss": 0.5139871835708618,
"eval_runtime": 342.8201,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.117,
"eval_wer": 46.45529034556466,
"step": 295
},
{
"epoch": 11.11,
"learning_rate": 7.4e-06,
"loss": 0.0694,
"step": 300
},
{
"epoch": 11.11,
"eval_loss": 0.5785766243934631,
"eval_runtime": 334.4012,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.12,
"eval_wer": 44.92340577128607,
"step": 300
},
{
"epoch": 11.3,
"learning_rate": 7.347368421052632e-06,
"loss": 0.09,
"step": 305
},
{
"epoch": 11.3,
"eval_loss": 0.584839940071106,
"eval_runtime": 343.2341,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.117,
"eval_wer": 49.59030993943712,
"step": 305
},
{
"epoch": 11.48,
"learning_rate": 7.2947368421052636e-06,
"loss": 0.0824,
"step": 310
},
{
"epoch": 11.48,
"eval_loss": 0.5461788773536682,
"eval_runtime": 330.7912,
"eval_samples_per_second": 0.242,
"eval_steps_per_second": 0.121,
"eval_wer": 45.24403277520484,
"step": 310
},
{
"epoch": 11.67,
"learning_rate": 7.242105263157896e-06,
"loss": 0.0805,
"step": 315
},
{
"epoch": 11.67,
"eval_loss": 0.537391185760498,
"eval_runtime": 352.6332,
"eval_samples_per_second": 0.227,
"eval_steps_per_second": 0.113,
"eval_wer": 54.25721410758817,
"step": 315
},
{
"epoch": 11.85,
"learning_rate": 7.189473684210527e-06,
"loss": 0.0688,
"step": 320
},
{
"epoch": 11.85,
"eval_loss": 0.5511082410812378,
"eval_runtime": 336.5436,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 42.8927680798005,
"step": 320
},
{
"epoch": 12.04,
"learning_rate": 7.1368421052631585e-06,
"loss": 0.048,
"step": 325
},
{
"epoch": 12.04,
"eval_loss": 0.5636075139045715,
"eval_runtime": 319.3896,
"eval_samples_per_second": 0.25,
"eval_steps_per_second": 0.125,
"eval_wer": 44.81653010331315,
"step": 325
},
{
"epoch": 12.22,
"learning_rate": 7.08421052631579e-06,
"loss": 0.054,
"step": 330
},
{
"epoch": 12.22,
"eval_loss": 0.5698742866516113,
"eval_runtime": 345.1573,
"eval_samples_per_second": 0.232,
"eval_steps_per_second": 0.116,
"eval_wer": 46.88279301745636,
"step": 330
},
{
"epoch": 12.41,
"learning_rate": 7.031578947368422e-06,
"loss": 0.0535,
"step": 335
},
{
"epoch": 12.41,
"eval_loss": 0.5786362886428833,
"eval_runtime": 331.7186,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 42.60776629853937,
"step": 335
},
{
"epoch": 12.59,
"learning_rate": 6.9789473684210525e-06,
"loss": 0.0644,
"step": 340
},
{
"epoch": 12.59,
"eval_loss": 0.5574811697006226,
"eval_runtime": 339.8046,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.118,
"eval_wer": 45.956537228357675,
"step": 340
},
{
"epoch": 12.78,
"learning_rate": 6.926315789473684e-06,
"loss": 0.0618,
"step": 345
},
{
"epoch": 12.78,
"eval_loss": 0.5477833151817322,
"eval_runtime": 337.626,
"eval_samples_per_second": 0.237,
"eval_steps_per_second": 0.118,
"eval_wer": 46.918418240114,
"step": 345
},
{
"epoch": 12.96,
"learning_rate": 6.873684210526317e-06,
"loss": 0.0832,
"step": 350
},
{
"epoch": 12.96,
"eval_loss": 0.6038811802864075,
"eval_runtime": 336.2086,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 44.068400427502674,
"step": 350
},
{
"epoch": 13.15,
"learning_rate": 6.821052631578948e-06,
"loss": 0.0592,
"step": 355
},
{
"epoch": 13.15,
"eval_loss": 0.6127667427062988,
"eval_runtime": 331.5309,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 42.32276451727823,
"step": 355
},
{
"epoch": 13.33,
"learning_rate": 6.76842105263158e-06,
"loss": 0.0619,
"step": 360
},
{
"epoch": 13.33,
"eval_loss": 0.591279149055481,
"eval_runtime": 332.3235,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.12,
"eval_wer": 45.03028143925899,
"step": 360
},
{
"epoch": 13.52,
"learning_rate": 6.715789473684211e-06,
"loss": 0.0998,
"step": 365
},
{
"epoch": 13.52,
"eval_loss": 0.5318233966827393,
"eval_runtime": 332.2782,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.12,
"eval_wer": 42.75026718916993,
"step": 365
},
{
"epoch": 13.7,
"learning_rate": 6.663157894736842e-06,
"loss": 0.05,
"step": 370
},
{
"epoch": 13.7,
"eval_loss": 0.5774287581443787,
"eval_runtime": 329.9705,
"eval_samples_per_second": 0.242,
"eval_steps_per_second": 0.121,
"eval_wer": 42.82151763448522,
"step": 370
},
{
"epoch": 13.89,
"learning_rate": 6.610526315789474e-06,
"loss": 0.1014,
"step": 375
},
{
"epoch": 13.89,
"eval_loss": 0.5849052667617798,
"eval_runtime": 328.3008,
"eval_samples_per_second": 0.244,
"eval_steps_per_second": 0.122,
"eval_wer": 48.05842536515853,
"step": 375
},
{
"epoch": 14.07,
"learning_rate": 6.557894736842106e-06,
"loss": 0.0494,
"step": 380
},
{
"epoch": 14.07,
"eval_loss": 0.5518860816955566,
"eval_runtime": 318.5974,
"eval_samples_per_second": 0.251,
"eval_steps_per_second": 0.126,
"eval_wer": 48.69967937299608,
"step": 380
},
{
"epoch": 14.26,
"learning_rate": 6.505263157894738e-06,
"loss": 0.0456,
"step": 385
},
{
"epoch": 14.26,
"eval_loss": 0.5957409739494324,
"eval_runtime": 335.9574,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 42.75026718916993,
"step": 385
},
{
"epoch": 14.44,
"learning_rate": 6.452631578947369e-06,
"loss": 0.0957,
"step": 390
},
{
"epoch": 14.44,
"eval_loss": 0.5850934982299805,
"eval_runtime": 347.6573,
"eval_samples_per_second": 0.23,
"eval_steps_per_second": 0.115,
"eval_wer": 48.80655504096901,
"step": 390
},
{
"epoch": 14.63,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0514,
"step": 395
},
{
"epoch": 14.63,
"eval_loss": 0.6157152652740479,
"eval_runtime": 332.3513,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.12,
"eval_wer": 49.661560384752406,
"step": 395
},
{
"epoch": 14.81,
"learning_rate": 6.347368421052632e-06,
"loss": 0.1045,
"step": 400
},
{
"epoch": 14.81,
"eval_loss": 0.5892783999443054,
"eval_runtime": 341.0177,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.117,
"eval_wer": 52.6540790879943,
"step": 400
},
{
"epoch": 15.0,
"learning_rate": 6.294736842105264e-06,
"loss": 0.0485,
"step": 405
},
{
"epoch": 15.0,
"eval_loss": 0.5818936228752136,
"eval_runtime": 334.6692,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.12,
"eval_wer": 50.9440684004275,
"step": 405
},
{
"epoch": 15.19,
"learning_rate": 6.242105263157895e-06,
"loss": 0.0705,
"step": 410
},
{
"epoch": 15.19,
"eval_loss": 0.5869933366775513,
"eval_runtime": 338.9314,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.118,
"eval_wer": 45.77841111506947,
"step": 410
},
{
"epoch": 15.37,
"learning_rate": 6.189473684210526e-06,
"loss": 0.0354,
"step": 415
},
{
"epoch": 15.37,
"eval_loss": 0.620836615562439,
"eval_runtime": 340.9346,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.117,
"eval_wer": 47.20342002137513,
"step": 415
},
{
"epoch": 15.56,
"learning_rate": 6.136842105263159e-06,
"loss": 0.0366,
"step": 420
},
{
"epoch": 15.56,
"eval_loss": 0.6018834114074707,
"eval_runtime": 339.0784,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.118,
"eval_wer": 46.02778767367296,
"step": 420
},
{
"epoch": 15.74,
"learning_rate": 6.08421052631579e-06,
"loss": 0.0422,
"step": 425
},
{
"epoch": 15.74,
"eval_loss": 0.5831278562545776,
"eval_runtime": 332.3516,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.12,
"eval_wer": 44.92340577128607,
"step": 425
},
{
"epoch": 15.93,
"learning_rate": 6.031578947368422e-06,
"loss": 0.1133,
"step": 430
},
{
"epoch": 15.93,
"eval_loss": 0.6038104295730591,
"eval_runtime": 333.9842,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 44.13965087281795,
"step": 430
},
{
"epoch": 16.11,
"learning_rate": 5.978947368421053e-06,
"loss": 0.0515,
"step": 435
},
{
"epoch": 16.11,
"eval_loss": 0.6272233724594116,
"eval_runtime": 331.7109,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 43.53402208763805,
"step": 435
},
{
"epoch": 16.3,
"learning_rate": 5.9263157894736844e-06,
"loss": 0.0709,
"step": 440
},
{
"epoch": 16.3,
"eval_loss": 0.6419104337692261,
"eval_runtime": 336.7239,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.119,
"eval_wer": 45.92091200570004,
"step": 440
},
{
"epoch": 16.48,
"learning_rate": 5.873684210526316e-06,
"loss": 0.0978,
"step": 445
},
{
"epoch": 16.48,
"eval_loss": 0.6428846120834351,
"eval_runtime": 334.722,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.12,
"eval_wer": 46.59779123619522,
"step": 445
},
{
"epoch": 16.67,
"learning_rate": 5.8210526315789486e-06,
"loss": 0.052,
"step": 450
},
{
"epoch": 16.67,
"eval_loss": 0.6158877611160278,
"eval_runtime": 340.1528,
"eval_samples_per_second": 0.235,
"eval_steps_per_second": 0.118,
"eval_wer": 45.2084075525472,
"step": 450
},
{
"epoch": 16.85,
"learning_rate": 5.76842105263158e-06,
"loss": 0.0508,
"step": 455
},
{
"epoch": 16.85,
"eval_loss": 0.631538987159729,
"eval_runtime": 334.4872,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.12,
"eval_wer": 45.24403277520484,
"step": 455
},
{
"epoch": 17.04,
"learning_rate": 5.715789473684211e-06,
"loss": 0.0453,
"step": 460
},
{
"epoch": 17.04,
"eval_loss": 0.6536934971809387,
"eval_runtime": 327.9543,
"eval_samples_per_second": 0.244,
"eval_steps_per_second": 0.122,
"eval_wer": 48.87780548628429,
"step": 460
},
{
"epoch": 17.22,
"learning_rate": 5.663157894736843e-06,
"loss": 0.0454,
"step": 465
},
{
"epoch": 17.22,
"eval_loss": 0.6028529405593872,
"eval_runtime": 337.2385,
"eval_samples_per_second": 0.237,
"eval_steps_per_second": 0.119,
"eval_wer": 48.770929818311366,
"step": 465
},
{
"epoch": 17.41,
"learning_rate": 5.610526315789474e-06,
"loss": 0.0662,
"step": 470
},
{
"epoch": 17.41,
"eval_loss": 0.5731960535049438,
"eval_runtime": 331.8614,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.121,
"eval_wer": 45.885286783042396,
"step": 470
},
{
"epoch": 17.59,
"learning_rate": 5.557894736842105e-06,
"loss": 0.0365,
"step": 475
},
{
"epoch": 17.59,
"eval_loss": 0.6006522178649902,
"eval_runtime": 320.1815,
"eval_samples_per_second": 0.25,
"eval_steps_per_second": 0.125,
"eval_wer": 43.89027431421446,
"step": 475
},
{
"epoch": 17.78,
"learning_rate": 5.505263157894737e-06,
"loss": 0.0728,
"step": 480
},
{
"epoch": 17.78,
"eval_loss": 0.6429422497749329,
"eval_runtime": 333.1015,
"eval_samples_per_second": 0.24,
"eval_steps_per_second": 0.12,
"eval_wer": 44.67402921268258,
"step": 480
},
{
"epoch": 17.96,
"learning_rate": 5.452631578947369e-06,
"loss": 0.0334,
"step": 485
},
{
"epoch": 17.96,
"eval_loss": 0.648144543170929,
"eval_runtime": 320.2927,
"eval_samples_per_second": 0.25,
"eval_steps_per_second": 0.125,
"eval_wer": 41.11150694691842,
"step": 485
}
],
"max_steps": 1000,
"num_train_epochs": 38,
"total_flos": 1.6475807121408e+19,
"trial_name": null,
"trial_params": null
}