ivanlau's picture
End of training
45d3667
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 18300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 47.84418869018555,
"eval_runtime": 186.8063,
"eval_samples_per_second": 19.587,
"eval_steps_per_second": 1.226,
"eval_wer": 1.0,
"step": 183
},
{
"epoch": 2.0,
"eval_loss": 6.310945510864258,
"eval_runtime": 163.2985,
"eval_samples_per_second": 22.407,
"eval_steps_per_second": 1.402,
"eval_wer": 1.0,
"step": 366
},
{
"epoch": 2.73,
"learning_rate": 0.00029699999999999996,
"loss": 41.8902,
"step": 500
},
{
"epoch": 3.0,
"eval_loss": 6.239192008972168,
"eval_runtime": 163.0201,
"eval_samples_per_second": 22.445,
"eval_steps_per_second": 1.405,
"eval_wer": 1.0,
"step": 549
},
{
"epoch": 4.0,
"eval_loss": 5.973925590515137,
"eval_runtime": 163.0214,
"eval_samples_per_second": 22.445,
"eval_steps_per_second": 1.405,
"eval_wer": 1.1123227917121048,
"step": 732
},
{
"epoch": 5.0,
"eval_loss": 4.901411056518555,
"eval_runtime": 162.8715,
"eval_samples_per_second": 22.466,
"eval_steps_per_second": 1.406,
"eval_wer": 1.9473827699018538,
"step": 915
},
{
"epoch": 5.46,
"learning_rate": 0.00018834586466165413,
"loss": 5.5817,
"step": 1000
},
{
"epoch": 6.0,
"eval_loss": 3.9892334938049316,
"eval_runtime": 163.2053,
"eval_samples_per_second": 22.42,
"eval_steps_per_second": 1.403,
"eval_wer": 1.0188113413304254,
"step": 1098
},
{
"epoch": 7.0,
"eval_loss": 3.5080456733703613,
"eval_runtime": 162.7362,
"eval_samples_per_second": 22.484,
"eval_steps_per_second": 1.407,
"eval_wer": 1.0103598691384952,
"step": 1281
},
{
"epoch": 8.0,
"eval_loss": 3.0796852111816406,
"eval_runtime": 164.2666,
"eval_samples_per_second": 22.275,
"eval_steps_per_second": 1.394,
"eval_wer": 0.9904580152671756,
"step": 1464
},
{
"epoch": 8.2,
"learning_rate": 7.556390977443609e-05,
"loss": 3.5579,
"step": 1500
},
{
"epoch": 9.0,
"eval_loss": 2.8110806941986084,
"eval_runtime": 168.541,
"eval_samples_per_second": 21.71,
"eval_steps_per_second": 1.359,
"eval_wer": 0.9836423118865867,
"step": 1647
},
{
"epoch": 10.0,
"eval_loss": 2.67260479927063,
"eval_runtime": 165.7448,
"eval_samples_per_second": 22.076,
"eval_steps_per_second": 1.382,
"eval_wer": 0.9814612868047983,
"step": 1830
},
{
"epoch": 10.93,
"learning_rate": 0.00027480337078651684,
"loss": 2.7771,
"step": 2000
},
{
"epoch": 11.0,
"eval_loss": 2.7177391052246094,
"eval_runtime": 191.8821,
"eval_samples_per_second": 19.069,
"eval_steps_per_second": 1.193,
"eval_wer": 0.9809160305343512,
"step": 2013
},
{
"epoch": 12.0,
"eval_loss": 2.3581743240356445,
"eval_runtime": 168.9095,
"eval_samples_per_second": 21.662,
"eval_steps_per_second": 1.356,
"eval_wer": 0.9691930207197382,
"step": 2196
},
{
"epoch": 13.0,
"eval_loss": 2.1708498001098633,
"eval_runtime": 167.2979,
"eval_samples_per_second": 21.871,
"eval_steps_per_second": 1.369,
"eval_wer": 0.9757360959651036,
"step": 2379
},
{
"epoch": 13.66,
"learning_rate": 0.000266376404494382,
"loss": 2.3488,
"step": 2500
},
{
"epoch": 14.0,
"eval_loss": 2.049051284790039,
"eval_runtime": 169.536,
"eval_samples_per_second": 21.582,
"eval_steps_per_second": 1.351,
"eval_wer": 0.9525627044711014,
"step": 2562
},
{
"epoch": 15.0,
"eval_loss": 1.8517801761627197,
"eval_runtime": 166.9821,
"eval_samples_per_second": 21.913,
"eval_steps_per_second": 1.371,
"eval_wer": 0.9378407851690295,
"step": 2745
},
{
"epoch": 16.0,
"eval_loss": 1.6844531297683716,
"eval_runtime": 166.3633,
"eval_samples_per_second": 21.994,
"eval_steps_per_second": 1.377,
"eval_wer": 0.9285714285714286,
"step": 2928
},
{
"epoch": 16.39,
"learning_rate": 0.00025794943820224716,
"loss": 1.7859,
"step": 3000
},
{
"epoch": 17.0,
"eval_loss": 1.641157627105713,
"eval_runtime": 165.2481,
"eval_samples_per_second": 22.142,
"eval_steps_per_second": 1.386,
"eval_wer": 0.9280261723009815,
"step": 3111
},
{
"epoch": 18.0,
"eval_loss": 1.5487942695617676,
"eval_runtime": 166.3028,
"eval_samples_per_second": 22.002,
"eval_steps_per_second": 1.377,
"eval_wer": 0.9034896401308615,
"step": 3294
},
{
"epoch": 19.0,
"eval_loss": 1.4545683860778809,
"eval_runtime": 169.7236,
"eval_samples_per_second": 21.559,
"eval_steps_per_second": 1.349,
"eval_wer": 0.9010359869138496,
"step": 3477
},
{
"epoch": 19.13,
"learning_rate": 0.00024952247191011235,
"loss": 1.3898,
"step": 3500
},
{
"epoch": 20.0,
"eval_loss": 1.5146596431732178,
"eval_runtime": 170.5824,
"eval_samples_per_second": 21.45,
"eval_steps_per_second": 1.342,
"eval_wer": 0.9201199563794984,
"step": 3660
},
{
"epoch": 21.0,
"eval_loss": 1.446706771850586,
"eval_runtime": 164.7146,
"eval_samples_per_second": 22.214,
"eval_steps_per_second": 1.39,
"eval_wer": 0.895856052344602,
"step": 3843
},
{
"epoch": 21.86,
"learning_rate": 0.00024111235955056177,
"loss": 1.1291,
"step": 4000
},
{
"epoch": 22.0,
"eval_loss": 1.474255084991455,
"eval_runtime": 166.7828,
"eval_samples_per_second": 21.939,
"eval_steps_per_second": 1.373,
"eval_wer": 0.9034896401308615,
"step": 4026
},
{
"epoch": 23.0,
"eval_loss": 1.3826948404312134,
"eval_runtime": 165.2382,
"eval_samples_per_second": 22.144,
"eval_steps_per_second": 1.386,
"eval_wer": 0.876226826608506,
"step": 4209
},
{
"epoch": 24.0,
"eval_loss": 1.3436840772628784,
"eval_runtime": 167.9623,
"eval_samples_per_second": 21.785,
"eval_steps_per_second": 1.363,
"eval_wer": 0.8792257360959651,
"step": 4392
},
{
"epoch": 24.59,
"learning_rate": 0.00023268539325842696,
"loss": 0.8993,
"step": 4500
},
{
"epoch": 25.0,
"eval_loss": 1.2894562482833862,
"eval_runtime": 179.0778,
"eval_samples_per_second": 20.432,
"eval_steps_per_second": 1.279,
"eval_wer": 0.8576881134133042,
"step": 4575
},
{
"epoch": 26.0,
"eval_loss": 1.2927522659301758,
"eval_runtime": 173.2382,
"eval_samples_per_second": 21.121,
"eval_steps_per_second": 1.322,
"eval_wer": 0.8557797164667393,
"step": 4758
},
{
"epoch": 27.0,
"eval_loss": 1.294653058052063,
"eval_runtime": 166.3679,
"eval_samples_per_second": 21.993,
"eval_steps_per_second": 1.376,
"eval_wer": 0.9163031624863686,
"step": 4941
},
{
"epoch": 27.32,
"learning_rate": 0.0002242584269662921,
"loss": 0.6298,
"step": 5000
},
{
"epoch": 28.0,
"eval_loss": 1.3150520324707031,
"eval_runtime": 164.8819,
"eval_samples_per_second": 22.192,
"eval_steps_per_second": 1.389,
"eval_wer": 0.873773173391494,
"step": 5124
},
{
"epoch": 29.0,
"eval_loss": 1.2971961498260498,
"eval_runtime": 163.7658,
"eval_samples_per_second": 22.343,
"eval_steps_per_second": 1.398,
"eval_wer": 0.8514176663031625,
"step": 5307
},
{
"epoch": 30.0,
"eval_loss": 1.302983283996582,
"eval_runtime": 162.8739,
"eval_samples_per_second": 22.465,
"eval_steps_per_second": 1.406,
"eval_wer": 0.8432388222464559,
"step": 5490
},
{
"epoch": 30.05,
"learning_rate": 0.00021583146067415728,
"loss": 0.4757,
"step": 5500
},
{
"epoch": 31.0,
"eval_loss": 1.3263603448867798,
"eval_runtime": 163.1422,
"eval_samples_per_second": 22.428,
"eval_steps_per_second": 1.404,
"eval_wer": 0.836423118865867,
"step": 5673
},
{
"epoch": 32.0,
"eval_loss": 1.3130974769592285,
"eval_runtime": 162.7813,
"eval_samples_per_second": 22.478,
"eval_steps_per_second": 1.407,
"eval_wer": 0.8421483097055616,
"step": 5856
},
{
"epoch": 32.79,
"learning_rate": 0.00020740449438202247,
"loss": 0.3735,
"step": 6000
},
{
"epoch": 33.0,
"eval_loss": 1.3457393646240234,
"eval_runtime": 164.771,
"eval_samples_per_second": 22.207,
"eval_steps_per_second": 1.39,
"eval_wer": 0.8587786259541985,
"step": 6039
},
{
"epoch": 34.0,
"eval_loss": 1.3450396060943604,
"eval_runtime": 163.6185,
"eval_samples_per_second": 22.363,
"eval_steps_per_second": 1.4,
"eval_wer": 0.8473282442748091,
"step": 6222
},
{
"epoch": 35.0,
"eval_loss": 1.3451658487319946,
"eval_runtime": 167.7067,
"eval_samples_per_second": 21.818,
"eval_steps_per_second": 1.365,
"eval_wer": 0.9217557251908397,
"step": 6405
},
{
"epoch": 35.52,
"learning_rate": 0.0001989775280898876,
"loss": 0.3253,
"step": 6500
},
{
"epoch": 36.0,
"eval_loss": 1.375409722328186,
"eval_runtime": 164.3009,
"eval_samples_per_second": 22.27,
"eval_steps_per_second": 1.394,
"eval_wer": 0.8396946564885496,
"step": 6588
},
{
"epoch": 37.0,
"eval_loss": 1.3554260730743408,
"eval_runtime": 163.3464,
"eval_samples_per_second": 22.4,
"eval_steps_per_second": 1.402,
"eval_wer": 0.8353326063249727,
"step": 6771
},
{
"epoch": 38.0,
"eval_loss": 1.353210687637329,
"eval_runtime": 166.4062,
"eval_samples_per_second": 21.988,
"eval_steps_per_second": 1.376,
"eval_wer": 0.8312431842966194,
"step": 6954
},
{
"epoch": 38.25,
"learning_rate": 0.0001905505617977528,
"loss": 0.2816,
"step": 7000
},
{
"epoch": 39.0,
"eval_loss": 1.369396686553955,
"eval_runtime": 165.9264,
"eval_samples_per_second": 22.052,
"eval_steps_per_second": 1.38,
"eval_wer": 0.8345147219193021,
"step": 7137
},
{
"epoch": 40.0,
"eval_loss": 1.395269751548767,
"eval_runtime": 163.891,
"eval_samples_per_second": 22.326,
"eval_steps_per_second": 1.397,
"eval_wer": 0.829607415485278,
"step": 7320
},
{
"epoch": 40.98,
"learning_rate": 0.00018212359550561795,
"loss": 0.2397,
"step": 7500
},
{
"epoch": 41.0,
"eval_loss": 1.385826826095581,
"eval_runtime": 165.4232,
"eval_samples_per_second": 22.119,
"eval_steps_per_second": 1.384,
"eval_wer": 0.8293347873500545,
"step": 7503
},
{
"epoch": 42.0,
"eval_loss": 1.3958967924118042,
"eval_runtime": 163.1457,
"eval_samples_per_second": 22.428,
"eval_steps_per_second": 1.404,
"eval_wer": 0.8402399127589967,
"step": 7686
},
{
"epoch": 43.0,
"eval_loss": 1.435033917427063,
"eval_runtime": 163.4766,
"eval_samples_per_second": 22.382,
"eval_steps_per_second": 1.401,
"eval_wer": 0.9318429661941112,
"step": 7869
},
{
"epoch": 43.72,
"learning_rate": 0.0001737134831460674,
"loss": 0.2084,
"step": 8000
},
{
"epoch": 44.0,
"eval_loss": 1.4003560543060303,
"eval_runtime": 164.4371,
"eval_samples_per_second": 22.252,
"eval_steps_per_second": 1.393,
"eval_wer": 0.8805888767720829,
"step": 8052
},
{
"epoch": 45.0,
"eval_loss": 1.387134075164795,
"eval_runtime": 164.8129,
"eval_samples_per_second": 22.201,
"eval_steps_per_second": 1.389,
"eval_wer": 0.8255179934569248,
"step": 8235
},
{
"epoch": 46.0,
"eval_loss": 1.4059827327728271,
"eval_runtime": 166.2017,
"eval_samples_per_second": 22.015,
"eval_steps_per_second": 1.378,
"eval_wer": 0.8252453653217012,
"step": 8418
},
{
"epoch": 46.45,
"learning_rate": 0.00016528651685393257,
"loss": 0.1853,
"step": 8500
},
{
"epoch": 47.0,
"eval_loss": 1.3992133140563965,
"eval_runtime": 164.2693,
"eval_samples_per_second": 22.274,
"eval_steps_per_second": 1.394,
"eval_wer": 0.8500545256270448,
"step": 8601
},
{
"epoch": 48.0,
"eval_loss": 1.4186208248138428,
"eval_runtime": 162.8365,
"eval_samples_per_second": 22.47,
"eval_steps_per_second": 1.406,
"eval_wer": 0.8252453653217012,
"step": 8784
},
{
"epoch": 49.0,
"eval_loss": 1.4120242595672607,
"eval_runtime": 164.9525,
"eval_samples_per_second": 22.182,
"eval_steps_per_second": 1.388,
"eval_wer": 0.8165212649945475,
"step": 8967
},
{
"epoch": 49.18,
"learning_rate": 0.00015685955056179775,
"loss": 0.1671,
"step": 9000
},
{
"epoch": 50.0,
"eval_loss": 1.4165652990341187,
"eval_runtime": 173.8761,
"eval_samples_per_second": 21.044,
"eval_steps_per_second": 1.317,
"eval_wer": 0.8214285714285714,
"step": 9150
},
{
"epoch": 51.0,
"eval_loss": 1.4411484003067017,
"eval_runtime": 165.9975,
"eval_samples_per_second": 22.043,
"eval_steps_per_second": 1.38,
"eval_wer": 0.8500545256270448,
"step": 9333
},
{
"epoch": 51.91,
"learning_rate": 0.00014843258426966292,
"loss": 0.1513,
"step": 9500
},
{
"epoch": 52.0,
"eval_loss": 1.4692339897155762,
"eval_runtime": 163.251,
"eval_samples_per_second": 22.413,
"eval_steps_per_second": 1.403,
"eval_wer": 0.839422028353326,
"step": 9516
},
{
"epoch": 53.0,
"eval_loss": 1.4640177488327026,
"eval_runtime": 162.0066,
"eval_samples_per_second": 22.586,
"eval_steps_per_second": 1.414,
"eval_wer": 0.8391494002181025,
"step": 9699
},
{
"epoch": 54.0,
"eval_loss": 1.450060486793518,
"eval_runtime": 165.9207,
"eval_samples_per_second": 22.053,
"eval_steps_per_second": 1.38,
"eval_wer": 0.8418756815703381,
"step": 9882
},
{
"epoch": 54.64,
"learning_rate": 0.00014002247191011234,
"loss": 0.133,
"step": 10000
},
{
"epoch": 55.0,
"eval_loss": 1.4133520126342773,
"eval_runtime": 163.1361,
"eval_samples_per_second": 22.429,
"eval_steps_per_second": 1.404,
"eval_wer": 0.8350599781897492,
"step": 10065
},
{
"epoch": 56.0,
"eval_loss": 1.4592992067337036,
"eval_runtime": 161.8579,
"eval_samples_per_second": 22.606,
"eval_steps_per_second": 1.415,
"eval_wer": 0.8405125408942202,
"step": 10248
},
{
"epoch": 57.0,
"eval_loss": 1.45597505569458,
"eval_runtime": 161.821,
"eval_samples_per_second": 22.611,
"eval_steps_per_second": 1.415,
"eval_wer": 0.8388767720828789,
"step": 10431
},
{
"epoch": 57.38,
"learning_rate": 0.00013159550561797753,
"loss": 0.1198,
"step": 10500
},
{
"epoch": 58.0,
"eval_loss": 1.4733554124832153,
"eval_runtime": 162.1949,
"eval_samples_per_second": 22.559,
"eval_steps_per_second": 1.412,
"eval_wer": 0.8334242093784079,
"step": 10614
},
{
"epoch": 59.0,
"eval_loss": 1.4649208784103394,
"eval_runtime": 162.1709,
"eval_samples_per_second": 22.563,
"eval_steps_per_second": 1.412,
"eval_wer": 0.8317884405670665,
"step": 10797
},
{
"epoch": 60.0,
"eval_loss": 1.4659123420715332,
"eval_runtime": 163.9506,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 1.397,
"eval_wer": 0.8099781897491821,
"step": 10980
},
{
"epoch": 60.11,
"learning_rate": 0.0001231685393258427,
"loss": 0.1109,
"step": 11000
},
{
"epoch": 61.0,
"eval_loss": 1.4783909320831299,
"eval_runtime": 165.1461,
"eval_samples_per_second": 22.156,
"eval_steps_per_second": 1.387,
"eval_wer": 0.811886586695747,
"step": 11163
},
{
"epoch": 62.0,
"eval_loss": 1.493830680847168,
"eval_runtime": 164.5944,
"eval_samples_per_second": 22.23,
"eval_steps_per_second": 1.391,
"eval_wer": 0.8148854961832062,
"step": 11346
},
{
"epoch": 62.84,
"learning_rate": 0.00011474157303370785,
"loss": 0.1063,
"step": 11500
},
{
"epoch": 63.0,
"eval_loss": 1.5050164461135864,
"eval_runtime": 163.3536,
"eval_samples_per_second": 22.399,
"eval_steps_per_second": 1.402,
"eval_wer": 0.8151581243184297,
"step": 11529
},
{
"epoch": 64.0,
"eval_loss": 1.4773460626602173,
"eval_runtime": 168.4977,
"eval_samples_per_second": 21.715,
"eval_steps_per_second": 1.359,
"eval_wer": 0.8176117775354417,
"step": 11712
},
{
"epoch": 65.0,
"eval_loss": 1.4835622310638428,
"eval_runtime": 166.8711,
"eval_samples_per_second": 21.927,
"eval_steps_per_second": 1.372,
"eval_wer": 0.8260632497273719,
"step": 11895
},
{
"epoch": 65.57,
"learning_rate": 0.00010631460674157301,
"loss": 0.0966,
"step": 12000
},
{
"epoch": 66.0,
"eval_loss": 1.4978560209274292,
"eval_runtime": 162.3032,
"eval_samples_per_second": 22.544,
"eval_steps_per_second": 1.411,
"eval_wer": 0.8157033805888768,
"step": 12078
},
{
"epoch": 67.0,
"eval_loss": 1.4603493213653564,
"eval_runtime": 162.8062,
"eval_samples_per_second": 22.475,
"eval_steps_per_second": 1.407,
"eval_wer": 0.8047982551799345,
"step": 12261
},
{
"epoch": 68.0,
"eval_loss": 1.4802541732788086,
"eval_runtime": 169.4775,
"eval_samples_per_second": 21.59,
"eval_steps_per_second": 1.351,
"eval_wer": 0.8127044711014176,
"step": 12444
},
{
"epoch": 68.31,
"learning_rate": 9.790449438202247e-05,
"loss": 0.0867,
"step": 12500
},
{
"epoch": 69.0,
"eval_loss": 1.4973595142364502,
"eval_runtime": 164.1372,
"eval_samples_per_second": 22.292,
"eval_steps_per_second": 1.395,
"eval_wer": 0.8129770992366412,
"step": 12627
},
{
"epoch": 70.0,
"eval_loss": 1.4721262454986572,
"eval_runtime": 163.536,
"eval_samples_per_second": 22.374,
"eval_steps_per_second": 1.4,
"eval_wer": 0.8077971646673937,
"step": 12810
},
{
"epoch": 71.0,
"eval_loss": 1.4643745422363281,
"eval_runtime": 166.8819,
"eval_samples_per_second": 21.926,
"eval_steps_per_second": 1.372,
"eval_wer": 0.819247546346783,
"step": 12993
},
{
"epoch": 71.04,
"learning_rate": 8.947752808988763e-05,
"loss": 0.0827,
"step": 13000
},
{
"epoch": 72.0,
"eval_loss": 1.4834694862365723,
"eval_runtime": 167.6508,
"eval_samples_per_second": 21.825,
"eval_steps_per_second": 1.366,
"eval_wer": 0.8137949836423118,
"step": 13176
},
{
"epoch": 73.0,
"eval_loss": 1.4933878183364868,
"eval_runtime": 170.3063,
"eval_samples_per_second": 21.485,
"eval_steps_per_second": 1.345,
"eval_wer": 0.8121592148309705,
"step": 13359
},
{
"epoch": 73.77,
"learning_rate": 8.10505617977528e-05,
"loss": 0.0734,
"step": 13500
},
{
"epoch": 74.0,
"eval_loss": 1.4950696229934692,
"eval_runtime": 175.7645,
"eval_samples_per_second": 20.818,
"eval_steps_per_second": 1.303,
"eval_wer": 0.8061613958560524,
"step": 13542
},
{
"epoch": 75.0,
"eval_loss": 1.490771770477295,
"eval_runtime": 168.0913,
"eval_samples_per_second": 21.768,
"eval_steps_per_second": 1.362,
"eval_wer": 0.806979280261723,
"step": 13725
},
{
"epoch": 76.0,
"eval_loss": 1.4876092672348022,
"eval_runtime": 166.128,
"eval_samples_per_second": 22.025,
"eval_steps_per_second": 1.378,
"eval_wer": 0.8124318429661941,
"step": 13908
},
{
"epoch": 76.5,
"learning_rate": 7.262359550561797e-05,
"loss": 0.0664,
"step": 14000
},
{
"epoch": 77.0,
"eval_loss": 1.493386149406433,
"eval_runtime": 166.8817,
"eval_samples_per_second": 21.926,
"eval_steps_per_second": 1.372,
"eval_wer": 0.8053435114503816,
"step": 14091
},
{
"epoch": 78.0,
"eval_loss": 1.4603490829467773,
"eval_runtime": 169.3203,
"eval_samples_per_second": 21.61,
"eval_steps_per_second": 1.352,
"eval_wer": 0.8047982551799345,
"step": 14274
},
{
"epoch": 79.0,
"eval_loss": 1.4732308387756348,
"eval_runtime": 165.2553,
"eval_samples_per_second": 22.142,
"eval_steps_per_second": 1.386,
"eval_wer": 0.8072519083969466,
"step": 14457
},
{
"epoch": 79.23,
"learning_rate": 6.42134831460674e-05,
"loss": 0.0602,
"step": 14500
},
{
"epoch": 80.0,
"eval_loss": 1.4924767017364502,
"eval_runtime": 166.2646,
"eval_samples_per_second": 22.007,
"eval_steps_per_second": 1.377,
"eval_wer": 0.8077971646673937,
"step": 14640
},
{
"epoch": 81.0,
"eval_loss": 1.4812291860580444,
"eval_runtime": 166.8878,
"eval_samples_per_second": 21.925,
"eval_steps_per_second": 1.372,
"eval_wer": 0.806434023991276,
"step": 14823
},
{
"epoch": 81.97,
"learning_rate": 5.578651685393258e-05,
"loss": 0.057,
"step": 15000
},
{
"epoch": 82.0,
"eval_loss": 1.4949839115142822,
"eval_runtime": 166.616,
"eval_samples_per_second": 21.961,
"eval_steps_per_second": 1.374,
"eval_wer": 0.8012540894220284,
"step": 15006
},
{
"epoch": 83.0,
"eval_loss": 1.4784878492355347,
"eval_runtime": 172.4918,
"eval_samples_per_second": 21.213,
"eval_steps_per_second": 1.328,
"eval_wer": 0.8056161395856052,
"step": 15189
},
{
"epoch": 84.0,
"eval_loss": 1.485625982284546,
"eval_runtime": 165.5579,
"eval_samples_per_second": 22.101,
"eval_steps_per_second": 1.383,
"eval_wer": 0.7993456924754635,
"step": 15372
},
{
"epoch": 84.7,
"learning_rate": 4.735955056179775e-05,
"loss": 0.0517,
"step": 15500
},
{
"epoch": 85.0,
"eval_loss": 1.4754849672317505,
"eval_runtime": 168.7909,
"eval_samples_per_second": 21.678,
"eval_steps_per_second": 1.357,
"eval_wer": 0.8034351145038168,
"step": 15555
},
{
"epoch": 86.0,
"eval_loss": 1.4813350439071655,
"eval_runtime": 168.272,
"eval_samples_per_second": 21.745,
"eval_steps_per_second": 1.361,
"eval_wer": 0.8034351145038168,
"step": 15738
},
{
"epoch": 87.0,
"eval_loss": 1.4965763092041016,
"eval_runtime": 166.9884,
"eval_samples_per_second": 21.912,
"eval_steps_per_second": 1.371,
"eval_wer": 0.8047982551799345,
"step": 15921
},
{
"epoch": 87.43,
"learning_rate": 3.893258426966292e-05,
"loss": 0.0468,
"step": 16000
},
{
"epoch": 88.0,
"eval_loss": 1.4883418083190918,
"eval_runtime": 166.1387,
"eval_samples_per_second": 22.024,
"eval_steps_per_second": 1.378,
"eval_wer": 0.8001635768811342,
"step": 16104
},
{
"epoch": 89.0,
"eval_loss": 1.4746148586273193,
"eval_runtime": 165.9654,
"eval_samples_per_second": 22.047,
"eval_steps_per_second": 1.38,
"eval_wer": 0.8023446019629226,
"step": 16287
},
{
"epoch": 90.0,
"eval_loss": 1.4697260856628418,
"eval_runtime": 166.9567,
"eval_samples_per_second": 21.916,
"eval_steps_per_second": 1.372,
"eval_wer": 0.7974372955288986,
"step": 16470
},
{
"epoch": 90.16,
"learning_rate": 3.0505617977528088e-05,
"loss": 0.0426,
"step": 16500
},
{
"epoch": 91.0,
"eval_loss": 1.4775140285491943,
"eval_runtime": 165.779,
"eval_samples_per_second": 22.072,
"eval_steps_per_second": 1.381,
"eval_wer": 0.8004362050163577,
"step": 16653
},
{
"epoch": 92.0,
"eval_loss": 1.4852207899093628,
"eval_runtime": 173.415,
"eval_samples_per_second": 21.1,
"eval_steps_per_second": 1.321,
"eval_wer": 0.8023446019629226,
"step": 16836
},
{
"epoch": 92.9,
"learning_rate": 2.2078651685393255e-05,
"loss": 0.0387,
"step": 17000
},
{
"epoch": 93.0,
"eval_loss": 1.4868098497390747,
"eval_runtime": 166.0768,
"eval_samples_per_second": 22.032,
"eval_steps_per_second": 1.379,
"eval_wer": 0.8004362050163577,
"step": 17019
},
{
"epoch": 94.0,
"eval_loss": 1.47845458984375,
"eval_runtime": 171.2193,
"eval_samples_per_second": 21.37,
"eval_steps_per_second": 1.337,
"eval_wer": 0.802071973827699,
"step": 17202
},
{
"epoch": 95.0,
"eval_loss": 1.4892385005950928,
"eval_runtime": 165.5064,
"eval_samples_per_second": 22.108,
"eval_steps_per_second": 1.384,
"eval_wer": 0.8015267175572519,
"step": 17385
},
{
"epoch": 95.63,
"learning_rate": 1.3668539325842695e-05,
"loss": 0.0359,
"step": 17500
},
{
"epoch": 96.0,
"eval_loss": 1.486182451248169,
"eval_runtime": 174.5056,
"eval_samples_per_second": 20.968,
"eval_steps_per_second": 1.312,
"eval_wer": 0.8017993456924755,
"step": 17568
},
{
"epoch": 97.0,
"eval_loss": 1.4851171970367432,
"eval_runtime": 165.0543,
"eval_samples_per_second": 22.168,
"eval_steps_per_second": 1.387,
"eval_wer": 0.8007088331515813,
"step": 17751
},
{
"epoch": 98.0,
"eval_loss": 1.48457932472229,
"eval_runtime": 169.1905,
"eval_samples_per_second": 21.627,
"eval_steps_per_second": 1.354,
"eval_wer": 0.7998909487459106,
"step": 17934
},
{
"epoch": 98.36,
"learning_rate": 5.241573033707864e-06,
"loss": 0.0347,
"step": 18000
},
{
"epoch": 99.0,
"eval_loss": 1.4852174520492554,
"eval_runtime": 168.792,
"eval_samples_per_second": 21.678,
"eval_steps_per_second": 1.357,
"eval_wer": 0.7993456924754635,
"step": 18117
},
{
"epoch": 100.0,
"eval_loss": 1.484755277633667,
"eval_runtime": 179.7891,
"eval_samples_per_second": 20.352,
"eval_steps_per_second": 1.274,
"eval_wer": 0.8004362050163577,
"step": 18300
},
{
"epoch": 100.0,
"step": 18300,
"total_flos": 1.3576650149787481e+20,
"train_loss": 0.34965579027686616,
"train_runtime": 66008.5183,
"train_samples_per_second": 17.704,
"train_steps_per_second": 0.277
}
],
"max_steps": 18300,
"num_train_epochs": 100,
"total_flos": 1.3576650149787481e+20,
"trial_name": null,
"trial_params": null
}