wav2vec2-xls-r-300m-zh-HK-v2 / trainer_state.json
w11wo's picture
End of training
5d161f0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.99866131191432,
"global_step": 37300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 4.85e-06,
"loss": 153.5094,
"step": 100
},
{
"epoch": 0.54,
"learning_rate": 9.85e-06,
"loss": 108.8648,
"step": 200
},
{
"epoch": 0.8,
"learning_rate": 1.48e-05,
"loss": 92.5714,
"step": 300
},
{
"epoch": 1.07,
"learning_rate": 1.9800000000000004e-05,
"loss": 79.9356,
"step": 400
},
{
"epoch": 1.34,
"learning_rate": 2.48e-05,
"loss": 69.8341,
"step": 500
},
{
"epoch": 1.34,
"eval_cer": 1.0,
"eval_loss": 80.07215118408203,
"eval_runtime": 130.8213,
"eval_samples_per_second": 17.597,
"eval_steps_per_second": 2.201,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.61,
"learning_rate": 2.98e-05,
"loss": 54.2478,
"step": 600
},
{
"epoch": 1.88,
"learning_rate": 3.48e-05,
"loss": 35.5793,
"step": 700
},
{
"epoch": 2.14,
"learning_rate": 3.9800000000000005e-05,
"loss": 17.7978,
"step": 800
},
{
"epoch": 2.41,
"learning_rate": 4.4800000000000005e-05,
"loss": 8.1204,
"step": 900
},
{
"epoch": 2.68,
"learning_rate": 4.9800000000000004e-05,
"loss": 6.6418,
"step": 1000
},
{
"epoch": 2.68,
"eval_cer": 1.0,
"eval_loss": 6.634645938873291,
"eval_runtime": 117.0489,
"eval_samples_per_second": 19.667,
"eval_steps_per_second": 2.461,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 2.95,
"learning_rate": 5.4800000000000004e-05,
"loss": 6.3633,
"step": 1100
},
{
"epoch": 3.22,
"learning_rate": 5.9800000000000003e-05,
"loss": 6.364,
"step": 1200
},
{
"epoch": 3.48,
"learning_rate": 6.48e-05,
"loss": 6.2461,
"step": 1300
},
{
"epoch": 3.75,
"learning_rate": 6.98e-05,
"loss": 6.2242,
"step": 1400
},
{
"epoch": 4.02,
"learning_rate": 7.48e-05,
"loss": 6.2419,
"step": 1500
},
{
"epoch": 4.02,
"eval_cer": 1.0,
"eval_loss": 6.290937423706055,
"eval_runtime": 117.0781,
"eval_samples_per_second": 19.662,
"eval_steps_per_second": 2.46,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 4.29,
"learning_rate": 7.98e-05,
"loss": 6.1691,
"step": 1600
},
{
"epoch": 4.56,
"learning_rate": 8.48e-05,
"loss": 6.1668,
"step": 1700
},
{
"epoch": 4.82,
"learning_rate": 8.98e-05,
"loss": 6.1623,
"step": 1800
},
{
"epoch": 5.09,
"learning_rate": 9.48e-05,
"loss": 6.1884,
"step": 1900
},
{
"epoch": 5.36,
"learning_rate": 9.98e-05,
"loss": 6.0813,
"step": 2000
},
{
"epoch": 5.36,
"eval_cer": 1.0,
"eval_loss": 6.115033149719238,
"eval_runtime": 117.9948,
"eval_samples_per_second": 19.509,
"eval_steps_per_second": 2.441,
"eval_wer": 1.0,
"step": 2000
},
{
"epoch": 5.63,
"learning_rate": 9.972804532577904e-05,
"loss": 6.1027,
"step": 2100
},
{
"epoch": 5.9,
"learning_rate": 9.944475920679887e-05,
"loss": 6.0586,
"step": 2200
},
{
"epoch": 6.17,
"learning_rate": 9.91614730878187e-05,
"loss": 6.0399,
"step": 2300
},
{
"epoch": 6.43,
"learning_rate": 9.887818696883852e-05,
"loss": 6.0035,
"step": 2400
},
{
"epoch": 6.7,
"learning_rate": 9.859490084985836e-05,
"loss": 5.9677,
"step": 2500
},
{
"epoch": 6.7,
"eval_cer": 1.002818566168114,
"eval_loss": 6.030123233795166,
"eval_runtime": 116.8078,
"eval_samples_per_second": 19.708,
"eval_steps_per_second": 2.466,
"eval_wer": 1.1385881333910783,
"step": 2500
},
{
"epoch": 6.97,
"learning_rate": 9.831161473087818e-05,
"loss": 5.9617,
"step": 2600
},
{
"epoch": 7.24,
"learning_rate": 9.802832861189802e-05,
"loss": 5.9736,
"step": 2700
},
{
"epoch": 7.51,
"learning_rate": 9.774504249291784e-05,
"loss": 5.9098,
"step": 2800
},
{
"epoch": 7.77,
"learning_rate": 9.746175637393768e-05,
"loss": 5.9069,
"step": 2900
},
{
"epoch": 8.04,
"learning_rate": 9.717847025495752e-05,
"loss": 5.9296,
"step": 3000
},
{
"epoch": 8.04,
"eval_cer": 1.0057735145701687,
"eval_loss": 5.897457599639893,
"eval_runtime": 116.427,
"eval_samples_per_second": 19.772,
"eval_steps_per_second": 2.474,
"eval_wer": 1.2113469034213946,
"step": 3000
},
{
"epoch": 8.31,
"learning_rate": 9.689518413597734e-05,
"loss": 5.8213,
"step": 3100
},
{
"epoch": 8.58,
"learning_rate": 9.661189801699718e-05,
"loss": 5.8241,
"step": 3200
},
{
"epoch": 8.85,
"learning_rate": 9.632861189801701e-05,
"loss": 5.7787,
"step": 3300
},
{
"epoch": 9.12,
"learning_rate": 9.604532577903684e-05,
"loss": 5.7529,
"step": 3400
},
{
"epoch": 9.38,
"learning_rate": 9.576203966005666e-05,
"loss": 5.6434,
"step": 3500
},
{
"epoch": 9.38,
"eval_cer": 1.017093239987271,
"eval_loss": 5.540365219116211,
"eval_runtime": 116.7079,
"eval_samples_per_second": 19.724,
"eval_steps_per_second": 2.468,
"eval_wer": 2.16240796881767,
"step": 3500
},
{
"epoch": 9.65,
"learning_rate": 9.54787535410765e-05,
"loss": 5.6259,
"step": 3600
},
{
"epoch": 9.92,
"learning_rate": 9.519546742209632e-05,
"loss": 5.5488,
"step": 3700
},
{
"epoch": 10.19,
"learning_rate": 9.491218130311616e-05,
"loss": 5.5068,
"step": 3800
},
{
"epoch": 10.46,
"learning_rate": 9.462889518413598e-05,
"loss": 5.3439,
"step": 3900
},
{
"epoch": 10.72,
"learning_rate": 9.434560906515582e-05,
"loss": 5.1974,
"step": 4000
},
{
"epoch": 10.72,
"eval_cer": 0.9365822612174387,
"eval_loss": 4.543997287750244,
"eval_runtime": 115.8257,
"eval_samples_per_second": 19.875,
"eval_steps_per_second": 2.486,
"eval_wer": 2.170203551320918,
"step": 4000
},
{
"epoch": 10.99,
"learning_rate": 9.406232294617564e-05,
"loss": 4.8692,
"step": 4100
},
{
"epoch": 11.26,
"learning_rate": 9.377903682719548e-05,
"loss": 4.7155,
"step": 4200
},
{
"epoch": 11.53,
"learning_rate": 9.34957507082153e-05,
"loss": 4.4978,
"step": 4300
},
{
"epoch": 11.8,
"learning_rate": 9.321246458923513e-05,
"loss": 4.4105,
"step": 4400
},
{
"epoch": 12.06,
"learning_rate": 9.292917847025496e-05,
"loss": 4.3601,
"step": 4500
},
{
"epoch": 12.06,
"eval_cer": 0.8998045187980179,
"eval_loss": 3.383898973464966,
"eval_runtime": 117.0791,
"eval_samples_per_second": 19.662,
"eval_steps_per_second": 2.46,
"eval_wer": 2.246427024686011,
"step": 4500
},
{
"epoch": 12.33,
"learning_rate": 9.264589235127479e-05,
"loss": 4.1745,
"step": 4600
},
{
"epoch": 12.6,
"learning_rate": 9.236260623229462e-05,
"loss": 4.1194,
"step": 4700
},
{
"epoch": 12.87,
"learning_rate": 9.207932011331445e-05,
"loss": 4.0809,
"step": 4800
},
{
"epoch": 13.14,
"learning_rate": 9.179603399433428e-05,
"loss": 3.9556,
"step": 4900
},
{
"epoch": 13.4,
"learning_rate": 9.151558073654392e-05,
"loss": 3.9321,
"step": 5000
},
{
"epoch": 13.4,
"eval_cer": 0.8400236395872165,
"eval_loss": 2.8784573078155518,
"eval_runtime": 117.9053,
"eval_samples_per_second": 19.524,
"eval_steps_per_second": 2.443,
"eval_wer": 2.3096578605456908,
"step": 5000
},
{
"epoch": 13.67,
"learning_rate": 9.123229461756374e-05,
"loss": 3.8826,
"step": 5100
},
{
"epoch": 13.94,
"learning_rate": 9.094900849858358e-05,
"loss": 3.7975,
"step": 5200
},
{
"epoch": 14.21,
"learning_rate": 9.06657223796034e-05,
"loss": 3.7704,
"step": 5300
},
{
"epoch": 14.48,
"learning_rate": 9.038243626062324e-05,
"loss": 3.6848,
"step": 5400
},
{
"epoch": 14.74,
"learning_rate": 9.009915014164306e-05,
"loss": 3.6462,
"step": 5500
},
{
"epoch": 14.74,
"eval_cer": 0.6663181342910397,
"eval_loss": 2.510770797729492,
"eval_runtime": 116.5338,
"eval_samples_per_second": 19.754,
"eval_steps_per_second": 2.471,
"eval_wer": 1.9623213512343005,
"step": 5500
},
{
"epoch": 15.01,
"learning_rate": 8.98158640226629e-05,
"loss": 3.584,
"step": 5600
},
{
"epoch": 15.28,
"learning_rate": 8.953257790368272e-05,
"loss": 3.531,
"step": 5700
},
{
"epoch": 15.55,
"learning_rate": 8.925212464589235e-05,
"loss": 3.5509,
"step": 5800
},
{
"epoch": 15.82,
"learning_rate": 8.896883852691219e-05,
"loss": 3.524,
"step": 5900
},
{
"epoch": 16.09,
"learning_rate": 8.868555240793201e-05,
"loss": 3.5156,
"step": 6000
},
{
"epoch": 16.09,
"eval_cer": 0.5705778060644633,
"eval_loss": 2.2789571285247803,
"eval_runtime": 115.731,
"eval_samples_per_second": 19.891,
"eval_steps_per_second": 2.489,
"eval_wer": 1.6478995236032914,
"step": 6000
},
{
"epoch": 16.35,
"learning_rate": 8.840226628895184e-05,
"loss": 3.4294,
"step": 6100
},
{
"epoch": 16.62,
"learning_rate": 8.811898016997168e-05,
"loss": 3.4156,
"step": 6200
},
{
"epoch": 16.89,
"learning_rate": 8.78356940509915e-05,
"loss": 3.3933,
"step": 6300
},
{
"epoch": 17.16,
"learning_rate": 8.755240793201134e-05,
"loss": 3.3293,
"step": 6400
},
{
"epoch": 17.43,
"learning_rate": 8.726912181303116e-05,
"loss": 3.32,
"step": 6500
},
{
"epoch": 17.43,
"eval_cer": 0.6244033277265082,
"eval_loss": 2.1449646949768066,
"eval_runtime": 119.2852,
"eval_samples_per_second": 19.298,
"eval_steps_per_second": 2.414,
"eval_wer": 1.833694239930706,
"step": 6500
},
{
"epoch": 17.69,
"learning_rate": 8.6985835694051e-05,
"loss": 3.3019,
"step": 6600
},
{
"epoch": 17.96,
"learning_rate": 8.670538243626063e-05,
"loss": 3.3058,
"step": 6700
},
{
"epoch": 18.23,
"learning_rate": 8.642209631728045e-05,
"loss": 3.289,
"step": 6800
},
{
"epoch": 18.5,
"learning_rate": 8.613881019830029e-05,
"loss": 3.208,
"step": 6900
},
{
"epoch": 18.77,
"learning_rate": 8.585552407932011e-05,
"loss": 3.1918,
"step": 7000
},
{
"epoch": 18.77,
"eval_cer": 0.6017184161476565,
"eval_loss": 1.8536365032196045,
"eval_runtime": 116.4854,
"eval_samples_per_second": 19.762,
"eval_steps_per_second": 2.472,
"eval_wer": 1.939367691641403,
"step": 7000
},
{
"epoch": 19.03,
"learning_rate": 8.557223796033995e-05,
"loss": 3.1877,
"step": 7100
},
{
"epoch": 19.3,
"learning_rate": 8.528895184135977e-05,
"loss": 3.0893,
"step": 7200
},
{
"epoch": 19.57,
"learning_rate": 8.500566572237961e-05,
"loss": 3.102,
"step": 7300
},
{
"epoch": 19.84,
"learning_rate": 8.472521246458924e-05,
"loss": 3.0536,
"step": 7400
},
{
"epoch": 20.11,
"learning_rate": 8.444192634560907e-05,
"loss": 3.1139,
"step": 7500
},
{
"epoch": 20.11,
"eval_cer": 0.5638496158567078,
"eval_loss": 1.7204933166503906,
"eval_runtime": 116.1457,
"eval_samples_per_second": 19.82,
"eval_steps_per_second": 2.48,
"eval_wer": 1.9112169770463403,
"step": 7500
},
{
"epoch": 20.37,
"learning_rate": 8.41586402266289e-05,
"loss": 2.9958,
"step": 7600
},
{
"epoch": 20.64,
"learning_rate": 8.387535410764873e-05,
"loss": 3.0055,
"step": 7700
},
{
"epoch": 20.91,
"learning_rate": 8.359206798866855e-05,
"loss": 2.9673,
"step": 7800
},
{
"epoch": 21.18,
"learning_rate": 8.330878186968839e-05,
"loss": 2.9276,
"step": 7900
},
{
"epoch": 21.45,
"learning_rate": 8.302549575070821e-05,
"loss": 2.8995,
"step": 8000
},
{
"epoch": 21.45,
"eval_cer": 0.3250443242260308,
"eval_loss": 1.5478395223617554,
"eval_runtime": 120.8897,
"eval_samples_per_second": 19.042,
"eval_steps_per_second": 2.382,
"eval_wer": 1.0623646600259853,
"step": 8000
},
{
"epoch": 21.71,
"learning_rate": 8.274220963172805e-05,
"loss": 2.8602,
"step": 8100
},
{
"epoch": 21.98,
"learning_rate": 8.245892351274787e-05,
"loss": 2.877,
"step": 8200
},
{
"epoch": 22.25,
"learning_rate": 8.217563739376771e-05,
"loss": 2.8283,
"step": 8300
},
{
"epoch": 22.52,
"learning_rate": 8.189235127478753e-05,
"loss": 2.7887,
"step": 8400
},
{
"epoch": 22.79,
"learning_rate": 8.160906515580737e-05,
"loss": 2.7572,
"step": 8500
},
{
"epoch": 22.79,
"eval_cer": 0.33668227485566216,
"eval_loss": 1.406813144683838,
"eval_runtime": 117.2331,
"eval_samples_per_second": 19.636,
"eval_steps_per_second": 2.457,
"eval_wer": 1.141186660892161,
"step": 8500
},
{
"epoch": 23.06,
"learning_rate": 8.132577903682719e-05,
"loss": 2.7576,
"step": 8600
},
{
"epoch": 23.32,
"learning_rate": 8.104249291784703e-05,
"loss": 2.7336,
"step": 8700
},
{
"epoch": 23.59,
"learning_rate": 8.075920679886687e-05,
"loss": 2.6792,
"step": 8800
},
{
"epoch": 23.86,
"learning_rate": 8.047592067988669e-05,
"loss": 2.6983,
"step": 8900
},
{
"epoch": 24.13,
"learning_rate": 8.019263456090653e-05,
"loss": 2.6881,
"step": 9000
},
{
"epoch": 24.13,
"eval_cer": 0.5683047688321134,
"eval_loss": 1.3311798572540283,
"eval_runtime": 116.7773,
"eval_samples_per_second": 19.713,
"eval_steps_per_second": 2.466,
"eval_wer": 2.009961022087484,
"step": 9000
},
{
"epoch": 24.4,
"learning_rate": 7.990934844192635e-05,
"loss": 2.6439,
"step": 9100
},
{
"epoch": 24.66,
"learning_rate": 7.962606232294619e-05,
"loss": 2.6563,
"step": 9200
},
{
"epoch": 24.93,
"learning_rate": 7.934277620396601e-05,
"loss": 2.6792,
"step": 9300
},
{
"epoch": 25.2,
"learning_rate": 7.905949008498585e-05,
"loss": 2.6905,
"step": 9400
},
{
"epoch": 25.47,
"learning_rate": 7.877620396600567e-05,
"loss": 2.5993,
"step": 9500
},
{
"epoch": 25.47,
"eval_cer": 0.6450425057962449,
"eval_loss": 1.2552706003189087,
"eval_runtime": 117.844,
"eval_samples_per_second": 19.534,
"eval_steps_per_second": 2.444,
"eval_wer": 2.003897791251624,
"step": 9500
},
{
"epoch": 25.74,
"learning_rate": 7.849291784702551e-05,
"loss": 2.6243,
"step": 9600
},
{
"epoch": 26.01,
"learning_rate": 7.820963172804533e-05,
"loss": 2.5753,
"step": 9700
},
{
"epoch": 26.27,
"learning_rate": 7.792634560906516e-05,
"loss": 2.521,
"step": 9800
},
{
"epoch": 26.54,
"learning_rate": 7.76458923512748e-05,
"loss": 2.5546,
"step": 9900
},
{
"epoch": 26.81,
"learning_rate": 7.736260623229463e-05,
"loss": 2.5304,
"step": 10000
},
{
"epoch": 26.81,
"eval_cer": 0.5788971223348638,
"eval_loss": 1.242166519165039,
"eval_runtime": 116.3994,
"eval_samples_per_second": 19.777,
"eval_steps_per_second": 2.474,
"eval_wer": 2.039411000433088,
"step": 10000
},
{
"epoch": 27.08,
"learning_rate": 7.707932011331445e-05,
"loss": 2.5599,
"step": 10100
},
{
"epoch": 27.35,
"learning_rate": 7.679603399433429e-05,
"loss": 2.4878,
"step": 10200
},
{
"epoch": 27.61,
"learning_rate": 7.651274787535411e-05,
"loss": 2.4684,
"step": 10300
},
{
"epoch": 27.88,
"learning_rate": 7.622946175637395e-05,
"loss": 2.4647,
"step": 10400
},
{
"epoch": 28.15,
"learning_rate": 7.594617563739377e-05,
"loss": 2.4352,
"step": 10500
},
{
"epoch": 28.15,
"eval_cer": 0.5506659999090785,
"eval_loss": 1.1581844091415405,
"eval_runtime": 116.2358,
"eval_samples_per_second": 19.805,
"eval_steps_per_second": 2.478,
"eval_wer": 1.9969683845820703,
"step": 10500
},
{
"epoch": 28.42,
"learning_rate": 7.56628895184136e-05,
"loss": 2.4437,
"step": 10600
},
{
"epoch": 28.69,
"learning_rate": 7.537960339943343e-05,
"loss": 2.44,
"step": 10700
},
{
"epoch": 28.95,
"learning_rate": 7.509631728045327e-05,
"loss": 2.447,
"step": 10800
},
{
"epoch": 29.22,
"learning_rate": 7.481303116147309e-05,
"loss": 2.4203,
"step": 10900
},
{
"epoch": 29.49,
"learning_rate": 7.452974504249293e-05,
"loss": 2.3795,
"step": 11000
},
{
"epoch": 29.49,
"eval_cer": 0.4843842342137564,
"eval_loss": 1.1159536838531494,
"eval_runtime": 117.3261,
"eval_samples_per_second": 19.621,
"eval_steps_per_second": 2.455,
"eval_wer": 1.8254655695106106,
"step": 11000
},
{
"epoch": 29.76,
"learning_rate": 7.424645892351275e-05,
"loss": 2.3967,
"step": 11100
},
{
"epoch": 30.03,
"learning_rate": 7.396317280453257e-05,
"loss": 2.3546,
"step": 11200
},
{
"epoch": 30.29,
"learning_rate": 7.367988668555241e-05,
"loss": 2.343,
"step": 11300
},
{
"epoch": 30.56,
"learning_rate": 7.339660056657224e-05,
"loss": 2.3377,
"step": 11400
},
{
"epoch": 30.83,
"learning_rate": 7.311331444759207e-05,
"loss": 2.3287,
"step": 11500
},
{
"epoch": 30.83,
"eval_cer": 0.3780060917397827,
"eval_loss": 1.0775071382522583,
"eval_runtime": 118.2979,
"eval_samples_per_second": 19.459,
"eval_steps_per_second": 2.435,
"eval_wer": 1.4122996968384582,
"step": 11500
},
{
"epoch": 31.1,
"learning_rate": 7.28300283286119e-05,
"loss": 2.341,
"step": 11600
},
{
"epoch": 31.37,
"learning_rate": 7.254674220963173e-05,
"loss": 2.3039,
"step": 11700
},
{
"epoch": 31.63,
"learning_rate": 7.226345609065156e-05,
"loss": 2.2769,
"step": 11800
},
{
"epoch": 31.9,
"learning_rate": 7.198300283286119e-05,
"loss": 2.323,
"step": 11900
},
{
"epoch": 32.17,
"learning_rate": 7.169971671388103e-05,
"loss": 2.2622,
"step": 12000
},
{
"epoch": 32.17,
"eval_cer": 0.48938491612492613,
"eval_loss": 1.0703905820846558,
"eval_runtime": 116.0515,
"eval_samples_per_second": 19.836,
"eval_steps_per_second": 2.482,
"eval_wer": 1.7444781290601992,
"step": 12000
},
{
"epoch": 32.44,
"learning_rate": 7.141643059490085e-05,
"loss": 2.2663,
"step": 12100
},
{
"epoch": 32.71,
"learning_rate": 7.113314447592069e-05,
"loss": 2.2797,
"step": 12200
},
{
"epoch": 32.97,
"learning_rate": 7.084985835694051e-05,
"loss": 2.264,
"step": 12300
},
{
"epoch": 33.24,
"learning_rate": 7.056657223796033e-05,
"loss": 2.2497,
"step": 12400
},
{
"epoch": 33.51,
"learning_rate": 7.028328611898017e-05,
"loss": 2.2225,
"step": 12500
},
{
"epoch": 33.51,
"eval_cer": 0.5057962449424922,
"eval_loss": 1.0272445678710938,
"eval_runtime": 118.2938,
"eval_samples_per_second": 19.46,
"eval_steps_per_second": 2.435,
"eval_wer": 1.7236899090515374,
"step": 12500
},
{
"epoch": 33.78,
"learning_rate": 7e-05,
"loss": 2.2025,
"step": 12600
},
{
"epoch": 34.05,
"learning_rate": 6.971671388101983e-05,
"loss": 2.1892,
"step": 12700
},
{
"epoch": 34.32,
"learning_rate": 6.943342776203965e-05,
"loss": 2.1498,
"step": 12800
},
{
"epoch": 34.58,
"learning_rate": 6.915014164305949e-05,
"loss": 2.1819,
"step": 12900
},
{
"epoch": 34.85,
"learning_rate": 6.886685552407931e-05,
"loss": 2.1843,
"step": 13000
},
{
"epoch": 34.85,
"eval_cer": 0.5028412965404373,
"eval_loss": 0.9756352305412292,
"eval_runtime": 117.2229,
"eval_samples_per_second": 19.638,
"eval_steps_per_second": 2.457,
"eval_wer": 1.8042442615851018,
"step": 13000
},
{
"epoch": 35.12,
"learning_rate": 6.858356940509915e-05,
"loss": 2.1578,
"step": 13100
},
{
"epoch": 35.39,
"learning_rate": 6.830028328611899e-05,
"loss": 2.1083,
"step": 13200
},
{
"epoch": 35.66,
"learning_rate": 6.801699716713881e-05,
"loss": 2.1531,
"step": 13300
},
{
"epoch": 35.92,
"learning_rate": 6.773371104815865e-05,
"loss": 2.11,
"step": 13400
},
{
"epoch": 36.19,
"learning_rate": 6.745042492917847e-05,
"loss": 2.1,
"step": 13500
},
{
"epoch": 36.19,
"eval_cer": 0.6055371186980043,
"eval_loss": 0.9526697993278503,
"eval_runtime": 118.3448,
"eval_samples_per_second": 19.452,
"eval_steps_per_second": 2.434,
"eval_wer": 1.8908618449545258,
"step": 13500
},
{
"epoch": 36.46,
"learning_rate": 6.716713881019831e-05,
"loss": 2.0948,
"step": 13600
},
{
"epoch": 36.73,
"learning_rate": 6.688385269121813e-05,
"loss": 2.071,
"step": 13700
},
{
"epoch": 37.0,
"learning_rate": 6.660056657223797e-05,
"loss": 2.1179,
"step": 13800
},
{
"epoch": 37.27,
"learning_rate": 6.63172804532578e-05,
"loss": 2.0444,
"step": 13900
},
{
"epoch": 37.53,
"learning_rate": 6.603399433427763e-05,
"loss": 2.0741,
"step": 14000
},
{
"epoch": 37.53,
"eval_cer": 0.5880347320089103,
"eval_loss": 0.941799521446228,
"eval_runtime": 117.0385,
"eval_samples_per_second": 19.669,
"eval_steps_per_second": 2.461,
"eval_wer": 1.902555218709398,
"step": 14000
},
{
"epoch": 37.8,
"learning_rate": 6.575070821529745e-05,
"loss": 2.0937,
"step": 14100
},
{
"epoch": 38.07,
"learning_rate": 6.546742209631729e-05,
"loss": 2.0848,
"step": 14200
},
{
"epoch": 38.34,
"learning_rate": 6.518413597733712e-05,
"loss": 2.0235,
"step": 14300
},
{
"epoch": 38.61,
"learning_rate": 6.490084985835695e-05,
"loss": 2.0165,
"step": 14400
},
{
"epoch": 38.87,
"learning_rate": 6.461756373937678e-05,
"loss": 2.0179,
"step": 14500
},
{
"epoch": 38.87,
"eval_cer": 0.5245715324817021,
"eval_loss": 0.93625807762146,
"eval_runtime": 117.33,
"eval_samples_per_second": 19.62,
"eval_steps_per_second": 2.455,
"eval_wer": 1.797747942832395,
"step": 14500
},
{
"epoch": 39.14,
"learning_rate": 6.43342776203966e-05,
"loss": 1.9771,
"step": 14600
},
{
"epoch": 39.41,
"learning_rate": 6.405099150141644e-05,
"loss": 1.9721,
"step": 14700
},
{
"epoch": 39.68,
"learning_rate": 6.376770538243626e-05,
"loss": 2.0099,
"step": 14800
},
{
"epoch": 39.95,
"learning_rate": 6.34844192634561e-05,
"loss": 2.0237,
"step": 14900
},
{
"epoch": 40.21,
"learning_rate": 6.320113314447592e-05,
"loss": 2.0615,
"step": 15000
},
{
"epoch": 40.21,
"eval_cer": 0.5598945310724189,
"eval_loss": 0.9634870886802673,
"eval_runtime": 118.3611,
"eval_samples_per_second": 19.449,
"eval_steps_per_second": 2.433,
"eval_wer": 1.8111736682546558,
"step": 15000
},
{
"epoch": 40.48,
"learning_rate": 6.291784702549576e-05,
"loss": 1.9647,
"step": 15100
},
{
"epoch": 40.75,
"learning_rate": 6.263456090651558e-05,
"loss": 1.9683,
"step": 15200
},
{
"epoch": 41.02,
"learning_rate": 6.235127478753542e-05,
"loss": 1.9311,
"step": 15300
},
{
"epoch": 41.29,
"learning_rate": 6.206798866855524e-05,
"loss": 1.9126,
"step": 15400
},
{
"epoch": 41.55,
"learning_rate": 6.178470254957506e-05,
"loss": 1.9448,
"step": 15500
},
{
"epoch": 41.55,
"eval_cer": 0.491430649634041,
"eval_loss": 0.9248816967010498,
"eval_runtime": 116.7415,
"eval_samples_per_second": 19.719,
"eval_steps_per_second": 2.467,
"eval_wer": 1.7249891728020788,
"step": 15500
},
{
"epoch": 41.82,
"learning_rate": 6.15014164305949e-05,
"loss": 1.934,
"step": 15600
},
{
"epoch": 42.09,
"learning_rate": 6.121813031161473e-05,
"loss": 2.0163,
"step": 15700
},
{
"epoch": 42.36,
"learning_rate": 6.093484419263457e-05,
"loss": 1.8725,
"step": 15800
},
{
"epoch": 42.63,
"learning_rate": 6.065155807365439e-05,
"loss": 1.9246,
"step": 15900
},
{
"epoch": 42.89,
"learning_rate": 6.036827195467423e-05,
"loss": 1.8966,
"step": 16000
},
{
"epoch": 42.89,
"eval_cer": 0.4318770741464745,
"eval_loss": 0.9022775888442993,
"eval_runtime": 116.8996,
"eval_samples_per_second": 19.692,
"eval_steps_per_second": 2.464,
"eval_wer": 1.5829363360762234,
"step": 16000
},
{
"epoch": 43.16,
"learning_rate": 6.008498583569405e-05,
"loss": 1.8316,
"step": 16100
},
{
"epoch": 43.43,
"learning_rate": 5.9804532577903686e-05,
"loss": 1.8786,
"step": 16200
},
{
"epoch": 43.7,
"learning_rate": 5.9521246458923516e-05,
"loss": 1.84,
"step": 16300
},
{
"epoch": 43.97,
"learning_rate": 5.9237960339943346e-05,
"loss": 1.875,
"step": 16400
},
{
"epoch": 44.24,
"learning_rate": 5.895750708215297e-05,
"loss": 1.8662,
"step": 16500
},
{
"epoch": 44.24,
"eval_cer": 0.42301222894031004,
"eval_loss": 0.9001737236976624,
"eval_runtime": 118.0444,
"eval_samples_per_second": 19.501,
"eval_steps_per_second": 2.44,
"eval_wer": 1.483326115201386,
"step": 16500
},
{
"epoch": 44.5,
"learning_rate": 5.867422096317281e-05,
"loss": 1.8645,
"step": 16600
},
{
"epoch": 44.77,
"learning_rate": 5.839093484419263e-05,
"loss": 1.8243,
"step": 16700
},
{
"epoch": 45.04,
"learning_rate": 5.810764872521247e-05,
"loss": 1.7991,
"step": 16800
},
{
"epoch": 45.31,
"learning_rate": 5.78243626062323e-05,
"loss": 1.7956,
"step": 16900
},
{
"epoch": 45.58,
"learning_rate": 5.754107648725213e-05,
"loss": 1.8136,
"step": 17000
},
{
"epoch": 45.58,
"eval_cer": 0.2986770923307724,
"eval_loss": 0.9075531959533691,
"eval_runtime": 118.7643,
"eval_samples_per_second": 19.383,
"eval_steps_per_second": 2.425,
"eval_wer": 1.1827631009094846,
"step": 17000
},
{
"epoch": 45.84,
"learning_rate": 5.725779036827196e-05,
"loss": 1.8,
"step": 17100
},
{
"epoch": 46.11,
"learning_rate": 5.6974504249291784e-05,
"loss": 1.8339,
"step": 17200
},
{
"epoch": 46.38,
"learning_rate": 5.669121813031162e-05,
"loss": 1.7869,
"step": 17300
},
{
"epoch": 46.65,
"learning_rate": 5.6407932011331444e-05,
"loss": 1.8145,
"step": 17400
},
{
"epoch": 46.92,
"learning_rate": 5.612464589235128e-05,
"loss": 1.7908,
"step": 17500
},
{
"epoch": 46.92,
"eval_cer": 0.42578533436377686,
"eval_loss": 0.8774313926696777,
"eval_runtime": 118.9119,
"eval_samples_per_second": 19.359,
"eval_steps_per_second": 2.422,
"eval_wer": 1.577306193157211,
"step": 17500
},
{
"epoch": 47.18,
"learning_rate": 5.5841359773371105e-05,
"loss": 1.7488,
"step": 17600
},
{
"epoch": 47.45,
"learning_rate": 5.555807365439094e-05,
"loss": 1.7289,
"step": 17700
},
{
"epoch": 47.72,
"learning_rate": 5.5274787535410765e-05,
"loss": 1.7722,
"step": 17800
},
{
"epoch": 47.99,
"learning_rate": 5.49915014164306e-05,
"loss": 1.7659,
"step": 17900
},
{
"epoch": 48.26,
"learning_rate": 5.4708215297450426e-05,
"loss": 1.7354,
"step": 18000
},
{
"epoch": 48.26,
"eval_cer": 0.40241851161522024,
"eval_loss": 0.8727295398712158,
"eval_runtime": 117.7378,
"eval_samples_per_second": 19.552,
"eval_steps_per_second": 2.446,
"eval_wer": 1.5036812472932006,
"step": 18000
},
{
"epoch": 48.52,
"learning_rate": 5.442492917847026e-05,
"loss": 1.7538,
"step": 18100
},
{
"epoch": 48.79,
"learning_rate": 5.4141643059490086e-05,
"loss": 1.7304,
"step": 18200
},
{
"epoch": 49.06,
"learning_rate": 5.385835694050991e-05,
"loss": 1.7194,
"step": 18300
},
{
"epoch": 49.33,
"learning_rate": 5.357507082152975e-05,
"loss": 1.6824,
"step": 18400
},
{
"epoch": 49.6,
"learning_rate": 5.329178470254958e-05,
"loss": 1.6739,
"step": 18500
},
{
"epoch": 49.6,
"eval_cer": 0.27890166840932856,
"eval_loss": 0.8635693788528442,
"eval_runtime": 118.5578,
"eval_samples_per_second": 19.417,
"eval_steps_per_second": 2.429,
"eval_wer": 1.1238631442182763,
"step": 18500
},
{
"epoch": 49.86,
"learning_rate": 5.300849858356941e-05,
"loss": 1.6807,
"step": 18600
},
{
"epoch": 50.13,
"learning_rate": 5.272521246458924e-05,
"loss": 1.6651,
"step": 18700
},
{
"epoch": 50.4,
"learning_rate": 5.2441926345609075e-05,
"loss": 1.7008,
"step": 18800
},
{
"epoch": 50.67,
"learning_rate": 5.21586402266289e-05,
"loss": 1.6183,
"step": 18900
},
{
"epoch": 50.94,
"learning_rate": 5.1875354107648735e-05,
"loss": 1.6457,
"step": 19000
},
{
"epoch": 50.94,
"eval_cer": 0.3103605037050507,
"eval_loss": 0.8516315221786499,
"eval_runtime": 117.1957,
"eval_samples_per_second": 19.642,
"eval_steps_per_second": 2.457,
"eval_wer": 1.2269380684278908,
"step": 19000
},
{
"epoch": 51.21,
"learning_rate": 5.159206798866856e-05,
"loss": 1.6506,
"step": 19100
},
{
"epoch": 51.47,
"learning_rate": 5.130878186968838e-05,
"loss": 1.6612,
"step": 19200
},
{
"epoch": 51.74,
"learning_rate": 5.102549575070822e-05,
"loss": 1.6339,
"step": 19300
},
{
"epoch": 52.01,
"learning_rate": 5.074220963172804e-05,
"loss": 1.6134,
"step": 19400
},
{
"epoch": 52.28,
"learning_rate": 5.045892351274788e-05,
"loss": 1.5847,
"step": 19500
},
{
"epoch": 52.28,
"eval_cer": 0.33600036368595715,
"eval_loss": 0.8398524522781372,
"eval_runtime": 116.7512,
"eval_samples_per_second": 19.717,
"eval_steps_per_second": 2.467,
"eval_wer": 1.3308791684711996,
"step": 19500
},
{
"epoch": 52.55,
"learning_rate": 5.01756373937677e-05,
"loss": 1.5839,
"step": 19600
},
{
"epoch": 52.81,
"learning_rate": 4.9892351274787533e-05,
"loss": 1.5887,
"step": 19700
},
{
"epoch": 53.08,
"learning_rate": 4.9609065155807364e-05,
"loss": 1.6578,
"step": 19800
},
{
"epoch": 53.35,
"learning_rate": 4.9325779036827194e-05,
"loss": 1.5896,
"step": 19900
},
{
"epoch": 53.62,
"learning_rate": 4.9042492917847024e-05,
"loss": 1.5971,
"step": 20000
},
{
"epoch": 53.62,
"eval_cer": 0.3334545619857253,
"eval_loss": 0.844145655632019,
"eval_runtime": 117.5266,
"eval_samples_per_second": 19.587,
"eval_steps_per_second": 2.451,
"eval_wer": 1.3152880034647034,
"step": 20000
},
{
"epoch": 53.89,
"learning_rate": 4.8759206798866854e-05,
"loss": 1.5645,
"step": 20100
},
{
"epoch": 54.16,
"learning_rate": 4.847592067988669e-05,
"loss": 1.481,
"step": 20200
},
{
"epoch": 54.42,
"learning_rate": 4.819263456090652e-05,
"loss": 1.5474,
"step": 20300
},
{
"epoch": 54.69,
"learning_rate": 4.790934844192635e-05,
"loss": 1.576,
"step": 20400
},
{
"epoch": 54.96,
"learning_rate": 4.762606232294618e-05,
"loss": 1.602,
"step": 20500
},
{
"epoch": 54.96,
"eval_cer": 0.34331954357412375,
"eval_loss": 0.8589980602264404,
"eval_runtime": 117.5211,
"eval_samples_per_second": 19.588,
"eval_steps_per_second": 2.451,
"eval_wer": 1.2932005197055003,
"step": 20500
},
{
"epoch": 55.23,
"learning_rate": 4.734277620396601e-05,
"loss": 1.6106,
"step": 20600
},
{
"epoch": 55.5,
"learning_rate": 4.7059490084985836e-05,
"loss": 1.551,
"step": 20700
},
{
"epoch": 55.76,
"learning_rate": 4.6776203966005666e-05,
"loss": 1.5118,
"step": 20800
},
{
"epoch": 56.03,
"learning_rate": 4.64957507082153e-05,
"loss": 1.5028,
"step": 20900
},
{
"epoch": 56.3,
"learning_rate": 4.621246458923513e-05,
"loss": 1.5063,
"step": 21000
},
{
"epoch": 56.3,
"eval_cer": 0.28749374914761106,
"eval_loss": 0.8333584070205688,
"eval_runtime": 116.6165,
"eval_samples_per_second": 19.74,
"eval_steps_per_second": 2.47,
"eval_wer": 1.1312256388046773,
"step": 21000
},
{
"epoch": 56.57,
"learning_rate": 4.592917847025496e-05,
"loss": 1.5195,
"step": 21100
},
{
"epoch": 56.84,
"learning_rate": 4.564589235127479e-05,
"loss": 1.549,
"step": 21200
},
{
"epoch": 57.1,
"learning_rate": 4.536260623229462e-05,
"loss": 1.5636,
"step": 21300
},
{
"epoch": 57.37,
"learning_rate": 4.507932011331445e-05,
"loss": 1.482,
"step": 21400
},
{
"epoch": 57.64,
"learning_rate": 4.479603399433428e-05,
"loss": 1.4631,
"step": 21500
},
{
"epoch": 57.64,
"eval_cer": 0.2999045324362413,
"eval_loss": 0.8474038243293762,
"eval_runtime": 118.5932,
"eval_samples_per_second": 19.411,
"eval_steps_per_second": 2.428,
"eval_wer": 1.169770463404071,
"step": 21500
},
{
"epoch": 57.91,
"learning_rate": 4.451274787535411e-05,
"loss": 1.4869,
"step": 21600
},
{
"epoch": 58.18,
"learning_rate": 4.422946175637394e-05,
"loss": 1.4692,
"step": 21700
},
{
"epoch": 58.44,
"learning_rate": 4.394617563739377e-05,
"loss": 1.4673,
"step": 21800
},
{
"epoch": 58.71,
"learning_rate": 4.36628895184136e-05,
"loss": 1.501,
"step": 21900
},
{
"epoch": 58.98,
"learning_rate": 4.3379603399433425e-05,
"loss": 1.4997,
"step": 22000
},
{
"epoch": 58.98,
"eval_cer": 0.38541619311724323,
"eval_loss": 0.8637779355049133,
"eval_runtime": 116.453,
"eval_samples_per_second": 19.768,
"eval_steps_per_second": 2.473,
"eval_wer": 1.4278908618449546,
"step": 22000
},
{
"epoch": 59.25,
"learning_rate": 4.3096317280453255e-05,
"loss": 1.4404,
"step": 22100
},
{
"epoch": 59.52,
"learning_rate": 4.2815864022662894e-05,
"loss": 1.4639,
"step": 22200
},
{
"epoch": 59.78,
"learning_rate": 4.2532577903682725e-05,
"loss": 1.4724,
"step": 22300
},
{
"epoch": 60.05,
"learning_rate": 4.224929178470255e-05,
"loss": 1.4146,
"step": 22400
},
{
"epoch": 60.32,
"learning_rate": 4.196600566572238e-05,
"loss": 1.4301,
"step": 22500
},
{
"epoch": 60.32,
"eval_cer": 0.32995408464790654,
"eval_loss": 0.8549993634223938,
"eval_runtime": 118.194,
"eval_samples_per_second": 19.476,
"eval_steps_per_second": 2.437,
"eval_wer": 1.27371156344738,
"step": 22500
},
{
"epoch": 60.59,
"learning_rate": 4.168271954674221e-05,
"loss": 1.4288,
"step": 22600
},
{
"epoch": 60.86,
"learning_rate": 4.139943342776204e-05,
"loss": 1.4183,
"step": 22700
},
{
"epoch": 61.13,
"learning_rate": 4.111614730878187e-05,
"loss": 1.3995,
"step": 22800
},
{
"epoch": 61.39,
"learning_rate": 4.08328611898017e-05,
"loss": 1.3967,
"step": 22900
},
{
"epoch": 61.66,
"learning_rate": 4.054957507082153e-05,
"loss": 1.3798,
"step": 23000
},
{
"epoch": 61.66,
"eval_cer": 0.2934491066963677,
"eval_loss": 0.8265963792800903,
"eval_runtime": 118.5302,
"eval_samples_per_second": 19.421,
"eval_steps_per_second": 2.43,
"eval_wer": 1.1801645734084019,
"step": 23000
},
{
"epoch": 61.93,
"learning_rate": 4.026628895184136e-05,
"loss": 1.3781,
"step": 23100
},
{
"epoch": 62.2,
"learning_rate": 3.99830028328612e-05,
"loss": 1.412,
"step": 23200
},
{
"epoch": 62.47,
"learning_rate": 3.969971671388103e-05,
"loss": 1.3643,
"step": 23300
},
{
"epoch": 62.73,
"learning_rate": 3.941643059490085e-05,
"loss": 1.3848,
"step": 23400
},
{
"epoch": 63.0,
"learning_rate": 3.913314447592068e-05,
"loss": 1.3454,
"step": 23500
},
{
"epoch": 63.0,
"eval_cer": 0.3711415192980861,
"eval_loss": 0.8234531879425049,
"eval_runtime": 118.9878,
"eval_samples_per_second": 19.347,
"eval_steps_per_second": 2.42,
"eval_wer": 1.3815504547423128,
"step": 23500
},
{
"epoch": 63.27,
"learning_rate": 3.884985835694051e-05,
"loss": 1.3549,
"step": 23600
},
{
"epoch": 63.54,
"learning_rate": 3.856657223796034e-05,
"loss": 1.3746,
"step": 23700
},
{
"epoch": 63.81,
"learning_rate": 3.828328611898017e-05,
"loss": 1.3619,
"step": 23800
},
{
"epoch": 64.07,
"learning_rate": 3.8e-05,
"loss": 1.4683,
"step": 23900
},
{
"epoch": 64.34,
"learning_rate": 3.771671388101983e-05,
"loss": 1.3678,
"step": 24000
},
{
"epoch": 64.34,
"eval_cer": 0.5034777469654953,
"eval_loss": 0.8549569249153137,
"eval_runtime": 117.2623,
"eval_samples_per_second": 19.631,
"eval_steps_per_second": 2.456,
"eval_wer": 1.642702468601126,
"step": 24000
},
{
"epoch": 64.61,
"learning_rate": 3.743342776203966e-05,
"loss": 1.3534,
"step": 24100
},
{
"epoch": 64.88,
"learning_rate": 3.715014164305949e-05,
"loss": 1.341,
"step": 24200
},
{
"epoch": 65.15,
"learning_rate": 3.686685552407932e-05,
"loss": 1.2738,
"step": 24300
},
{
"epoch": 65.41,
"learning_rate": 3.658356940509915e-05,
"loss": 1.3237,
"step": 24400
},
{
"epoch": 65.68,
"learning_rate": 3.630028328611898e-05,
"loss": 1.3761,
"step": 24500
},
{
"epoch": 65.68,
"eval_cer": 0.490703277719689,
"eval_loss": 0.8510046601295471,
"eval_runtime": 120.5712,
"eval_samples_per_second": 19.092,
"eval_steps_per_second": 2.389,
"eval_wer": 1.6708531831961888,
"step": 24500
},
{
"epoch": 65.95,
"learning_rate": 3.6016997167138814e-05,
"loss": 1.3209,
"step": 24600
},
{
"epoch": 66.22,
"learning_rate": 3.5733711048158644e-05,
"loss": 1.4141,
"step": 24700
},
{
"epoch": 66.49,
"learning_rate": 3.5450424929178474e-05,
"loss": 1.3229,
"step": 24800
},
{
"epoch": 66.76,
"learning_rate": 3.5167138810198305e-05,
"loss": 1.3413,
"step": 24900
},
{
"epoch": 67.02,
"learning_rate": 3.4883852691218135e-05,
"loss": 1.2668,
"step": 25000
},
{
"epoch": 67.02,
"eval_cer": 0.45051597945174343,
"eval_loss": 0.8514528274536133,
"eval_runtime": 118.6832,
"eval_samples_per_second": 19.396,
"eval_steps_per_second": 2.427,
"eval_wer": 1.5842355998267648,
"step": 25000
},
{
"epoch": 67.29,
"learning_rate": 3.4600566572237965e-05,
"loss": 1.3151,
"step": 25100
},
{
"epoch": 67.56,
"learning_rate": 3.4317280453257796e-05,
"loss": 1.3491,
"step": 25200
},
{
"epoch": 67.83,
"learning_rate": 3.4033994334277626e-05,
"loss": 1.3392,
"step": 25300
},
{
"epoch": 68.1,
"learning_rate": 3.375070821529745e-05,
"loss": 1.3551,
"step": 25400
},
{
"epoch": 68.36,
"learning_rate": 3.346742209631728e-05,
"loss": 1.2835,
"step": 25500
},
{
"epoch": 68.36,
"eval_cer": 0.4221030140473701,
"eval_loss": 0.8283268213272095,
"eval_runtime": 118.4861,
"eval_samples_per_second": 19.428,
"eval_steps_per_second": 2.431,
"eval_wer": 1.5352966652230402,
"step": 25500
},
{
"epoch": 68.63,
"learning_rate": 3.318413597733711e-05,
"loss": 1.2847,
"step": 25600
},
{
"epoch": 68.9,
"learning_rate": 3.290084985835694e-05,
"loss": 1.3164,
"step": 25700
},
{
"epoch": 69.17,
"learning_rate": 3.261756373937677e-05,
"loss": 1.2624,
"step": 25800
},
{
"epoch": 69.44,
"learning_rate": 3.23342776203966e-05,
"loss": 1.3301,
"step": 25900
},
{
"epoch": 69.7,
"learning_rate": 3.205099150141643e-05,
"loss": 1.2961,
"step": 26000
},
{
"epoch": 69.7,
"eval_cer": 0.43692321680229124,
"eval_loss": 0.8339292407035828,
"eval_runtime": 119.6696,
"eval_samples_per_second": 19.236,
"eval_steps_per_second": 2.407,
"eval_wer": 1.574274577739281,
"step": 26000
},
{
"epoch": 69.97,
"learning_rate": 3.176770538243626e-05,
"loss": 1.2716,
"step": 26100
},
{
"epoch": 70.24,
"learning_rate": 3.148441926345609e-05,
"loss": 1.2832,
"step": 26200
},
{
"epoch": 70.51,
"learning_rate": 3.120113314447592e-05,
"loss": 1.2607,
"step": 26300
},
{
"epoch": 70.78,
"learning_rate": 3.091784702549575e-05,
"loss": 1.2774,
"step": 26400
},
{
"epoch": 71.05,
"learning_rate": 3.0637393767705384e-05,
"loss": 1.2656,
"step": 26500
},
{
"epoch": 71.05,
"eval_cer": 0.42169386734554715,
"eval_loss": 0.8330555558204651,
"eval_runtime": 120.4651,
"eval_samples_per_second": 19.109,
"eval_steps_per_second": 2.391,
"eval_wer": 1.5331312256388048,
"step": 26500
},
{
"epoch": 71.31,
"learning_rate": 3.0354107648725215e-05,
"loss": 1.2885,
"step": 26600
},
{
"epoch": 71.58,
"learning_rate": 3.007082152974504e-05,
"loss": 1.2552,
"step": 26700
},
{
"epoch": 71.85,
"learning_rate": 2.9787535410764872e-05,
"loss": 1.2682,
"step": 26800
},
{
"epoch": 72.12,
"learning_rate": 2.9504249291784702e-05,
"loss": 1.2665,
"step": 26900
},
{
"epoch": 72.39,
"learning_rate": 2.9220963172804532e-05,
"loss": 1.2556,
"step": 27000
},
{
"epoch": 72.39,
"eval_cer": 0.4109196708642088,
"eval_loss": 0.8242233991622925,
"eval_runtime": 118.9857,
"eval_samples_per_second": 19.347,
"eval_steps_per_second": 2.42,
"eval_wer": 1.4707665656128195,
"step": 27000
},
{
"epoch": 72.65,
"learning_rate": 2.8937677053824363e-05,
"loss": 1.2125,
"step": 27100
},
{
"epoch": 72.92,
"learning_rate": 2.8654390934844193e-05,
"loss": 1.2157,
"step": 27200
},
{
"epoch": 73.19,
"learning_rate": 2.8371104815864023e-05,
"loss": 1.2664,
"step": 27300
},
{
"epoch": 73.46,
"learning_rate": 2.8087818696883857e-05,
"loss": 1.2075,
"step": 27400
},
{
"epoch": 73.73,
"learning_rate": 2.7804532577903687e-05,
"loss": 1.2043,
"step": 27500
},
{
"epoch": 73.73,
"eval_cer": 0.40305496204027824,
"eval_loss": 0.8244912624359131,
"eval_runtime": 118.8221,
"eval_samples_per_second": 19.373,
"eval_steps_per_second": 2.424,
"eval_wer": 1.4469467301862278,
"step": 27500
},
{
"epoch": 73.99,
"learning_rate": 2.7521246458923517e-05,
"loss": 1.2218,
"step": 27600
},
{
"epoch": 74.26,
"learning_rate": 2.723796033994334e-05,
"loss": 1.2257,
"step": 27700
},
{
"epoch": 74.53,
"learning_rate": 2.695467422096317e-05,
"loss": 1.1943,
"step": 27800
},
{
"epoch": 74.8,
"learning_rate": 2.6671388101983e-05,
"loss": 1.2292,
"step": 27900
},
{
"epoch": 75.07,
"learning_rate": 2.638810198300283e-05,
"loss": 1.2722,
"step": 28000
},
{
"epoch": 75.07,
"eval_cer": 0.4095558485247988,
"eval_loss": 0.8202398419380188,
"eval_runtime": 118.3165,
"eval_samples_per_second": 19.456,
"eval_steps_per_second": 2.434,
"eval_wer": 1.4924209614551753,
"step": 28000
},
{
"epoch": 75.33,
"learning_rate": 2.6104815864022665e-05,
"loss": 1.2,
"step": 28100
},
{
"epoch": 75.6,
"learning_rate": 2.5821529745042495e-05,
"loss": 1.1984,
"step": 28200
},
{
"epoch": 75.87,
"learning_rate": 2.5538243626062326e-05,
"loss": 1.204,
"step": 28300
},
{
"epoch": 76.14,
"learning_rate": 2.5254957507082156e-05,
"loss": 1.103,
"step": 28400
},
{
"epoch": 76.41,
"learning_rate": 2.4971671388101983e-05,
"loss": 1.202,
"step": 28500
},
{
"epoch": 76.41,
"eval_cer": 0.37186889121243805,
"eval_loss": 0.8290452361106873,
"eval_runtime": 117.9552,
"eval_samples_per_second": 19.516,
"eval_steps_per_second": 2.442,
"eval_wer": 1.3806842789086184,
"step": 28500
},
{
"epoch": 76.67,
"learning_rate": 2.4691218130311615e-05,
"loss": 1.1915,
"step": 28600
},
{
"epoch": 76.94,
"learning_rate": 2.4407932011331446e-05,
"loss": 1.1898,
"step": 28700
},
{
"epoch": 77.21,
"learning_rate": 2.4124645892351276e-05,
"loss": 1.2433,
"step": 28800
},
{
"epoch": 77.48,
"learning_rate": 2.3841359773371106e-05,
"loss": 1.1887,
"step": 28900
},
{
"epoch": 77.75,
"learning_rate": 2.3558073654390936e-05,
"loss": 1.1679,
"step": 29000
},
{
"epoch": 77.75,
"eval_cer": 0.3748693003591399,
"eval_loss": 0.8194963335990906,
"eval_runtime": 117.7002,
"eval_samples_per_second": 19.558,
"eval_steps_per_second": 2.447,
"eval_wer": 1.4097011693373755,
"step": 29000
},
{
"epoch": 78.02,
"learning_rate": 2.3274787535410767e-05,
"loss": 1.1151,
"step": 29100
},
{
"epoch": 78.28,
"learning_rate": 2.2991501416430597e-05,
"loss": 1.1638,
"step": 29200
},
{
"epoch": 78.55,
"learning_rate": 2.2708215297450424e-05,
"loss": 1.1516,
"step": 29300
},
{
"epoch": 78.82,
"learning_rate": 2.2424929178470254e-05,
"loss": 1.1652,
"step": 29400
},
{
"epoch": 79.09,
"learning_rate": 2.2141643059490084e-05,
"loss": 1.1967,
"step": 29500
},
{
"epoch": 79.09,
"eval_cer": 0.30767831977087784,
"eval_loss": 0.8058642148971558,
"eval_runtime": 118.6259,
"eval_samples_per_second": 19.406,
"eval_steps_per_second": 2.428,
"eval_wer": 1.2074491121697704,
"step": 29500
},
{
"epoch": 79.36,
"learning_rate": 2.1858356940509918e-05,
"loss": 1.1425,
"step": 29600
},
{
"epoch": 79.62,
"learning_rate": 2.1575070821529748e-05,
"loss": 1.1489,
"step": 29700
},
{
"epoch": 79.89,
"learning_rate": 2.1291784702549575e-05,
"loss": 1.136,
"step": 29800
},
{
"epoch": 80.16,
"learning_rate": 2.1008498583569405e-05,
"loss": 1.0913,
"step": 29900
},
{
"epoch": 80.43,
"learning_rate": 2.0725212464589236e-05,
"loss": 1.1241,
"step": 30000
},
{
"epoch": 80.43,
"eval_cer": 0.3270445969904987,
"eval_loss": 0.8137025833129883,
"eval_runtime": 117.4298,
"eval_samples_per_second": 19.603,
"eval_steps_per_second": 2.453,
"eval_wer": 1.2451277609354698,
"step": 30000
},
{
"epoch": 80.7,
"learning_rate": 2.0441926345609066e-05,
"loss": 1.1314,
"step": 30100
},
{
"epoch": 80.96,
"learning_rate": 2.0158640226628896e-05,
"loss": 1.1315,
"step": 30200
},
{
"epoch": 81.23,
"learning_rate": 1.9875354107648726e-05,
"loss": 1.1535,
"step": 30300
},
{
"epoch": 81.5,
"learning_rate": 1.9592067988668557e-05,
"loss": 1.1567,
"step": 30400
},
{
"epoch": 81.77,
"learning_rate": 1.931161473087819e-05,
"loss": 1.1414,
"step": 30500
},
{
"epoch": 81.77,
"eval_cer": 0.3120880120016366,
"eval_loss": 0.8117419481277466,
"eval_runtime": 116.2752,
"eval_samples_per_second": 19.798,
"eval_steps_per_second": 2.477,
"eval_wer": 1.2031182330012993,
"step": 30500
},
{
"epoch": 82.04,
"learning_rate": 1.902832861189802e-05,
"loss": 1.0878,
"step": 30600
},
{
"epoch": 82.31,
"learning_rate": 1.8745042492917846e-05,
"loss": 1.0806,
"step": 30700
},
{
"epoch": 82.57,
"learning_rate": 1.8461756373937677e-05,
"loss": 1.0914,
"step": 30800
},
{
"epoch": 82.84,
"learning_rate": 1.8178470254957507e-05,
"loss": 1.1274,
"step": 30900
},
{
"epoch": 83.11,
"learning_rate": 1.7895184135977337e-05,
"loss": 1.132,
"step": 31000
},
{
"epoch": 83.11,
"eval_cer": 0.390053189071237,
"eval_loss": 0.823433518409729,
"eval_runtime": 118.2321,
"eval_samples_per_second": 19.47,
"eval_steps_per_second": 2.436,
"eval_wer": 1.4265915980944133,
"step": 31000
},
{
"epoch": 83.38,
"learning_rate": 1.761189801699717e-05,
"loss": 1.0891,
"step": 31100
},
{
"epoch": 83.65,
"learning_rate": 1.7328611898016998e-05,
"loss": 1.1041,
"step": 31200
},
{
"epoch": 83.91,
"learning_rate": 1.7045325779036828e-05,
"loss": 1.0895,
"step": 31300
},
{
"epoch": 84.18,
"learning_rate": 1.6762039660056658e-05,
"loss": 1.0966,
"step": 31400
},
{
"epoch": 84.45,
"learning_rate": 1.647875354107649e-05,
"loss": 1.0982,
"step": 31500
},
{
"epoch": 84.45,
"eval_cer": 0.3606855480292767,
"eval_loss": 0.8063952326774597,
"eval_runtime": 119.0446,
"eval_samples_per_second": 19.337,
"eval_steps_per_second": 2.419,
"eval_wer": 1.3711563447379818,
"step": 31500
},
{
"epoch": 84.72,
"learning_rate": 1.619546742209632e-05,
"loss": 1.0969,
"step": 31600
},
{
"epoch": 84.99,
"learning_rate": 1.5912181303116146e-05,
"loss": 1.0552,
"step": 31700
},
{
"epoch": 85.25,
"learning_rate": 1.562889518413598e-05,
"loss": 1.079,
"step": 31800
},
{
"epoch": 85.52,
"learning_rate": 1.534560906515581e-05,
"loss": 1.0959,
"step": 31900
},
{
"epoch": 85.79,
"learning_rate": 1.5062322946175638e-05,
"loss": 1.0797,
"step": 32000
},
{
"epoch": 85.79,
"eval_cer": 0.356184934309224,
"eval_loss": 0.8166823983192444,
"eval_runtime": 117.1158,
"eval_samples_per_second": 19.656,
"eval_steps_per_second": 2.459,
"eval_wer": 1.335643135556518,
"step": 32000
},
{
"epoch": 86.06,
"learning_rate": 1.477903682719547e-05,
"loss": 1.1358,
"step": 32100
},
{
"epoch": 86.33,
"learning_rate": 1.4495750708215297e-05,
"loss": 1.0718,
"step": 32200
},
{
"epoch": 86.59,
"learning_rate": 1.4212464589235127e-05,
"loss": 1.0725,
"step": 32300
},
{
"epoch": 86.86,
"learning_rate": 1.3929178470254959e-05,
"loss": 1.0705,
"step": 32400
},
{
"epoch": 87.13,
"learning_rate": 1.364589235127479e-05,
"loss": 1.0119,
"step": 32500
},
{
"epoch": 87.13,
"eval_cer": 0.3267718325226167,
"eval_loss": 0.8214733600616455,
"eval_runtime": 118.8447,
"eval_samples_per_second": 19.37,
"eval_steps_per_second": 2.423,
"eval_wer": 1.2754439151147683,
"step": 32500
},
{
"epoch": 87.4,
"learning_rate": 1.336260623229462e-05,
"loss": 1.0571,
"step": 32600
},
{
"epoch": 87.67,
"learning_rate": 1.3079320113314446e-05,
"loss": 1.0636,
"step": 32700
},
{
"epoch": 87.93,
"learning_rate": 1.2796033994334278e-05,
"loss": 1.0892,
"step": 32800
},
{
"epoch": 88.2,
"learning_rate": 1.2512747875354109e-05,
"loss": 1.1356,
"step": 32900
},
{
"epoch": 88.47,
"learning_rate": 1.2229461756373939e-05,
"loss": 1.0216,
"step": 33000
},
{
"epoch": 88.47,
"eval_cer": 0.3183615947629222,
"eval_loss": 0.816307008266449,
"eval_runtime": 118.8553,
"eval_samples_per_second": 19.368,
"eval_steps_per_second": 2.423,
"eval_wer": 1.2511909917713295,
"step": 33000
},
{
"epoch": 88.74,
"learning_rate": 1.1946175637393768e-05,
"loss": 1.0226,
"step": 33100
},
{
"epoch": 89.01,
"learning_rate": 1.1662889518413598e-05,
"loss": 1.0002,
"step": 33200
},
{
"epoch": 89.28,
"learning_rate": 1.1379603399433428e-05,
"loss": 1.0296,
"step": 33300
},
{
"epoch": 89.54,
"learning_rate": 1.1096317280453258e-05,
"loss": 1.0635,
"step": 33400
},
{
"epoch": 89.81,
"learning_rate": 1.0813031161473089e-05,
"loss": 1.0375,
"step": 33500
},
{
"epoch": 89.81,
"eval_cer": 0.3290448697549666,
"eval_loss": 0.8136931657791138,
"eval_runtime": 119.3202,
"eval_samples_per_second": 19.293,
"eval_steps_per_second": 2.414,
"eval_wer": 1.2685145084452143,
"step": 33500
},
{
"epoch": 90.08,
"learning_rate": 1.0529745042492919e-05,
"loss": 1.096,
"step": 33600
},
{
"epoch": 90.35,
"learning_rate": 1.0246458923512749e-05,
"loss": 1.0465,
"step": 33700
},
{
"epoch": 90.62,
"learning_rate": 9.963172804532578e-06,
"loss": 1.0637,
"step": 33800
},
{
"epoch": 90.88,
"learning_rate": 9.679886685552408e-06,
"loss": 1.0514,
"step": 33900
},
{
"epoch": 91.15,
"learning_rate": 9.396600566572238e-06,
"loss": 0.9794,
"step": 34000
},
{
"epoch": 91.15,
"eval_cer": 0.32549893167250077,
"eval_loss": 0.8219542503356934,
"eval_runtime": 117.5416,
"eval_samples_per_second": 19.585,
"eval_steps_per_second": 2.45,
"eval_wer": 1.2724122996968386,
"step": 34000
},
{
"epoch": 91.42,
"learning_rate": 9.113314447592068e-06,
"loss": 1.0268,
"step": 34100
},
{
"epoch": 91.69,
"learning_rate": 8.830028328611899e-06,
"loss": 1.0211,
"step": 34200
},
{
"epoch": 91.96,
"learning_rate": 8.546742209631727e-06,
"loss": 1.0557,
"step": 34300
},
{
"epoch": 92.22,
"learning_rate": 8.26345609065156e-06,
"loss": 1.0814,
"step": 34400
},
{
"epoch": 92.49,
"learning_rate": 7.98016997167139e-06,
"loss": 1.0207,
"step": 34500
},
{
"epoch": 92.49,
"eval_cer": 0.33609128517525116,
"eval_loss": 0.8165063261985779,
"eval_runtime": 117.4538,
"eval_samples_per_second": 19.599,
"eval_steps_per_second": 2.452,
"eval_wer": 1.2906019922044174,
"step": 34500
},
{
"epoch": 92.76,
"learning_rate": 7.696883852691218e-06,
"loss": 1.0183,
"step": 34600
},
{
"epoch": 93.03,
"learning_rate": 7.413597733711048e-06,
"loss": 0.9889,
"step": 34700
},
{
"epoch": 93.3,
"learning_rate": 7.130311614730878e-06,
"loss": 1.0047,
"step": 34800
},
{
"epoch": 93.56,
"learning_rate": 6.847025495750709e-06,
"loss": 1.0318,
"step": 34900
},
{
"epoch": 93.83,
"learning_rate": 6.563739376770539e-06,
"loss": 1.0169,
"step": 35000
},
{
"epoch": 93.83,
"eval_cer": 0.3305450743283175,
"eval_loss": 0.8153378367424011,
"eval_runtime": 118.1425,
"eval_samples_per_second": 19.485,
"eval_steps_per_second": 2.438,
"eval_wer": 1.281940233867475,
"step": 35000
},
{
"epoch": 94.1,
"learning_rate": 6.2804532577903686e-06,
"loss": 1.0584,
"step": 35100
},
{
"epoch": 94.37,
"learning_rate": 5.997167138810199e-06,
"loss": 1.0074,
"step": 35200
},
{
"epoch": 94.64,
"learning_rate": 5.71671388101983e-06,
"loss": 1.0278,
"step": 35300
},
{
"epoch": 94.9,
"learning_rate": 5.433427762039661e-06,
"loss": 1.0251,
"step": 35400
},
{
"epoch": 95.17,
"learning_rate": 5.15014164305949e-06,
"loss": 1.0127,
"step": 35500
},
{
"epoch": 95.17,
"eval_cer": 0.3251807064599718,
"eval_loss": 0.8187472224235535,
"eval_runtime": 117.413,
"eval_samples_per_second": 19.606,
"eval_steps_per_second": 2.453,
"eval_wer": 1.2832394976180164,
"step": 35500
},
{
"epoch": 95.44,
"learning_rate": 4.86685552407932e-06,
"loss": 1.0153,
"step": 35600
},
{
"epoch": 95.71,
"learning_rate": 4.58356940509915e-06,
"loss": 1.0098,
"step": 35700
},
{
"epoch": 95.98,
"learning_rate": 4.300283286118981e-06,
"loss": 1.0034,
"step": 35800
},
{
"epoch": 96.25,
"learning_rate": 4.01699716713881e-06,
"loss": 1.017,
"step": 35900
},
{
"epoch": 96.51,
"learning_rate": 3.7337110481586406e-06,
"loss": 0.9978,
"step": 36000
},
{
"epoch": 96.51,
"eval_cer": 0.3209528572078011,
"eval_loss": 0.811066746711731,
"eval_runtime": 117.24,
"eval_samples_per_second": 19.635,
"eval_steps_per_second": 2.456,
"eval_wer": 1.2611520138588133,
"step": 36000
},
{
"epoch": 96.78,
"learning_rate": 3.4504249291784704e-06,
"loss": 1.0111,
"step": 36100
},
{
"epoch": 97.05,
"learning_rate": 3.1671388101983003e-06,
"loss": 1.0185,
"step": 36200
},
{
"epoch": 97.32,
"learning_rate": 2.8838526912181305e-06,
"loss": 0.9678,
"step": 36300
},
{
"epoch": 97.59,
"learning_rate": 2.6005665722379608e-06,
"loss": 1.0271,
"step": 36400
},
{
"epoch": 97.85,
"learning_rate": 2.3172804532577906e-06,
"loss": 0.9923,
"step": 36500
},
{
"epoch": 97.85,
"eval_cer": 0.3122243942355776,
"eval_loss": 0.8076378703117371,
"eval_runtime": 116.5522,
"eval_samples_per_second": 19.751,
"eval_steps_per_second": 2.471,
"eval_wer": 1.227804244261585,
"step": 36500
},
{
"epoch": 98.12,
"learning_rate": 2.0339943342776205e-06,
"loss": 0.9955,
"step": 36600
},
{
"epoch": 98.39,
"learning_rate": 1.7507082152974505e-06,
"loss": 1.0009,
"step": 36700
},
{
"epoch": 98.66,
"learning_rate": 1.4674220963172806e-06,
"loss": 0.9855,
"step": 36800
},
{
"epoch": 98.93,
"learning_rate": 1.1869688385269122e-06,
"loss": 1.0056,
"step": 36900
},
{
"epoch": 99.2,
"learning_rate": 9.036827195467423e-07,
"loss": 1.0451,
"step": 37000
},
{
"epoch": 99.2,
"eval_cer": 0.3155884893394554,
"eval_loss": 0.8086187243461609,
"eval_runtime": 119.7199,
"eval_samples_per_second": 19.228,
"eval_steps_per_second": 2.406,
"eval_wer": 1.2451277609354698,
"step": 37000
},
{
"epoch": 99.46,
"learning_rate": 6.203966005665723e-07,
"loss": 1.0189,
"step": 37100
},
{
"epoch": 99.73,
"learning_rate": 3.371104815864023e-07,
"loss": 0.9878,
"step": 37200
},
{
"epoch": 100.0,
"learning_rate": 5.3824362606232296e-08,
"loss": 0.96,
"step": 37300
},
{
"epoch": 100.0,
"step": 37300,
"total_flos": 1.39626030021533e+20,
"train_loss": 3.7852419735087786,
"train_runtime": 72640.1075,
"train_samples_per_second": 16.45,
"train_steps_per_second": 0.513
}
],
"max_steps": 37300,
"num_train_epochs": 100,
"total_flos": 1.39626030021533e+20,
"trial_name": null,
"trial_params": null
}