xls-r-300m-npsc-4 / trainer_state.json
Rolv-Arild's picture
End of training
a40988b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.999153259949196,
"global_step": 17700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 1.8749999999999998e-06,
"loss": 9.7932,
"step": 50
},
{
"epoch": 0.11,
"learning_rate": 3.7125e-06,
"loss": 9.6218,
"step": 100
},
{
"epoch": 0.17,
"learning_rate": 5.549999999999999e-06,
"loss": 7.6384,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 7.425e-06,
"loss": 5.5724,
"step": 200
},
{
"epoch": 0.28,
"learning_rate": 9.299999999999999e-06,
"loss": 4.4527,
"step": 250
},
{
"epoch": 0.28,
"eval_loss": 4.014413833618164,
"eval_runtime": 432.9366,
"eval_samples_per_second": 14.339,
"eval_steps_per_second": 0.896,
"eval_wer": 1.0,
"step": 250
},
{
"epoch": 0.34,
"learning_rate": 1.1174999999999999e-05,
"loss": 3.996,
"step": 300
},
{
"epoch": 0.4,
"learning_rate": 1.3049999999999999e-05,
"loss": 3.6961,
"step": 350
},
{
"epoch": 0.45,
"learning_rate": 1.4925e-05,
"loss": 3.4442,
"step": 400
},
{
"epoch": 0.51,
"learning_rate": 1.68e-05,
"loss": 3.3442,
"step": 450
},
{
"epoch": 0.56,
"learning_rate": 1.8675e-05,
"loss": 3.1828,
"step": 500
},
{
"epoch": 0.56,
"eval_loss": 3.136876106262207,
"eval_runtime": 432.521,
"eval_samples_per_second": 14.353,
"eval_steps_per_second": 0.897,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.62,
"learning_rate": 2.055e-05,
"loss": 3.1052,
"step": 550
},
{
"epoch": 0.68,
"learning_rate": 2.2424999999999996e-05,
"loss": 3.0545,
"step": 600
},
{
"epoch": 0.73,
"learning_rate": 2.4299999999999998e-05,
"loss": 3.0155,
"step": 650
},
{
"epoch": 0.79,
"learning_rate": 2.6174999999999996e-05,
"loss": 3.0148,
"step": 700
},
{
"epoch": 0.85,
"learning_rate": 2.8049999999999997e-05,
"loss": 2.9927,
"step": 750
},
{
"epoch": 0.85,
"eval_loss": 3.0182671546936035,
"eval_runtime": 435.1578,
"eval_samples_per_second": 14.266,
"eval_steps_per_second": 0.892,
"eval_wer": 1.0,
"step": 750
},
{
"epoch": 0.9,
"learning_rate": 2.9925e-05,
"loss": 2.9829,
"step": 800
},
{
"epoch": 0.96,
"learning_rate": 3.1799999999999994e-05,
"loss": 2.9876,
"step": 850
},
{
"epoch": 1.02,
"learning_rate": 3.3675e-05,
"loss": 3.0304,
"step": 900
},
{
"epoch": 1.07,
"learning_rate": 3.555e-05,
"loss": 2.9783,
"step": 950
},
{
"epoch": 1.13,
"learning_rate": 3.7424999999999995e-05,
"loss": 2.9591,
"step": 1000
},
{
"epoch": 1.13,
"eval_loss": 2.999102830886841,
"eval_runtime": 430.4872,
"eval_samples_per_second": 14.421,
"eval_steps_per_second": 0.901,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 1.19,
"learning_rate": 3.93e-05,
"loss": 2.9559,
"step": 1050
},
{
"epoch": 1.24,
"learning_rate": 4.1175e-05,
"loss": 2.9388,
"step": 1100
},
{
"epoch": 1.3,
"learning_rate": 4.3049999999999996e-05,
"loss": 2.9321,
"step": 1150
},
{
"epoch": 1.36,
"learning_rate": 4.4924999999999994e-05,
"loss": 2.9205,
"step": 1200
},
{
"epoch": 1.41,
"learning_rate": 4.68e-05,
"loss": 2.8989,
"step": 1250
},
{
"epoch": 1.41,
"eval_loss": 2.9000213146209717,
"eval_runtime": 431.5059,
"eval_samples_per_second": 14.387,
"eval_steps_per_second": 0.899,
"eval_wer": 0.999990658308967,
"step": 1250
},
{
"epoch": 1.47,
"learning_rate": 4.8675e-05,
"loss": 2.8682,
"step": 1300
},
{
"epoch": 1.52,
"learning_rate": 5.055e-05,
"loss": 2.8476,
"step": 1350
},
{
"epoch": 1.58,
"learning_rate": 5.2424999999999994e-05,
"loss": 2.7956,
"step": 1400
},
{
"epoch": 1.64,
"learning_rate": 5.429999999999999e-05,
"loss": 2.6754,
"step": 1450
},
{
"epoch": 1.69,
"learning_rate": 5.6175e-05,
"loss": 2.4286,
"step": 1500
},
{
"epoch": 1.69,
"eval_loss": 1.7688498497009277,
"eval_runtime": 430.3663,
"eval_samples_per_second": 14.425,
"eval_steps_per_second": 0.902,
"eval_wer": 0.9550384410586005,
"step": 1500
},
{
"epoch": 1.75,
"learning_rate": 5.8049999999999995e-05,
"loss": 2.218,
"step": 1550
},
{
"epoch": 1.81,
"learning_rate": 5.9925e-05,
"loss": 2.0095,
"step": 1600
},
{
"epoch": 1.86,
"learning_rate": 6.18e-05,
"loss": 1.8416,
"step": 1650
},
{
"epoch": 1.92,
"learning_rate": 6.367499999999999e-05,
"loss": 1.7642,
"step": 1700
},
{
"epoch": 1.98,
"learning_rate": 6.555e-05,
"loss": 1.6765,
"step": 1750
},
{
"epoch": 1.98,
"eval_loss": 0.6841917037963867,
"eval_runtime": 433.019,
"eval_samples_per_second": 14.337,
"eval_steps_per_second": 0.896,
"eval_wer": 0.48551570805347183,
"step": 1750
},
{
"epoch": 2.03,
"learning_rate": 6.7425e-05,
"loss": 1.5994,
"step": 1800
},
{
"epoch": 2.09,
"learning_rate": 6.93e-05,
"loss": 1.5522,
"step": 1850
},
{
"epoch": 2.15,
"learning_rate": 7.1175e-05,
"loss": 1.52,
"step": 1900
},
{
"epoch": 2.2,
"learning_rate": 7.304999999999999e-05,
"loss": 1.5086,
"step": 1950
},
{
"epoch": 2.26,
"learning_rate": 7.492499999999999e-05,
"loss": 1.4521,
"step": 2000
},
{
"epoch": 2.26,
"eval_loss": 0.5096011757850647,
"eval_runtime": 431.8266,
"eval_samples_per_second": 14.376,
"eval_steps_per_second": 0.899,
"eval_wer": 0.3735835660971349,
"step": 2000
},
{
"epoch": 2.32,
"learning_rate": 7.477070063694266e-05,
"loss": 1.4457,
"step": 2050
},
{
"epoch": 2.37,
"learning_rate": 7.453184713375795e-05,
"loss": 1.4276,
"step": 2100
},
{
"epoch": 2.43,
"learning_rate": 7.429299363057323e-05,
"loss": 1.4028,
"step": 2150
},
{
"epoch": 2.49,
"learning_rate": 7.405414012738853e-05,
"loss": 1.3887,
"step": 2200
},
{
"epoch": 2.54,
"learning_rate": 7.38152866242038e-05,
"loss": 1.3589,
"step": 2250
},
{
"epoch": 2.54,
"eval_loss": 0.44788965582847595,
"eval_runtime": 430.2855,
"eval_samples_per_second": 14.428,
"eval_steps_per_second": 0.902,
"eval_wer": 0.3335450783300793,
"step": 2250
},
{
"epoch": 2.6,
"learning_rate": 7.35764331210191e-05,
"loss": 1.3935,
"step": 2300
},
{
"epoch": 2.65,
"learning_rate": 7.333757961783438e-05,
"loss": 1.3425,
"step": 2350
},
{
"epoch": 2.71,
"learning_rate": 7.309872611464967e-05,
"loss": 1.3657,
"step": 2400
},
{
"epoch": 2.77,
"learning_rate": 7.285987261146495e-05,
"loss": 1.3645,
"step": 2450
},
{
"epoch": 2.82,
"learning_rate": 7.262101910828025e-05,
"loss": 1.3136,
"step": 2500
},
{
"epoch": 2.82,
"eval_loss": 0.40564054250717163,
"eval_runtime": 428.4501,
"eval_samples_per_second": 14.489,
"eval_steps_per_second": 0.906,
"eval_wer": 0.3123020729212402,
"step": 2500
},
{
"epoch": 2.88,
"learning_rate": 7.238216560509553e-05,
"loss": 1.3415,
"step": 2550
},
{
"epoch": 2.94,
"learning_rate": 7.214331210191082e-05,
"loss": 1.3345,
"step": 2600
},
{
"epoch": 2.99,
"learning_rate": 7.19044585987261e-05,
"loss": 1.3283,
"step": 2650
},
{
"epoch": 3.05,
"learning_rate": 7.16656050955414e-05,
"loss": 1.2788,
"step": 2700
},
{
"epoch": 3.11,
"learning_rate": 7.142675159235667e-05,
"loss": 1.2856,
"step": 2750
},
{
"epoch": 3.11,
"eval_loss": 0.38699424266815186,
"eval_runtime": 430.1514,
"eval_samples_per_second": 14.432,
"eval_steps_per_second": 0.902,
"eval_wer": 0.29870991246835504,
"step": 2750
},
{
"epoch": 3.16,
"learning_rate": 7.118789808917197e-05,
"loss": 1.2817,
"step": 2800
},
{
"epoch": 3.22,
"learning_rate": 7.094904458598725e-05,
"loss": 1.2502,
"step": 2850
},
{
"epoch": 3.28,
"learning_rate": 7.071019108280254e-05,
"loss": 1.2623,
"step": 2900
},
{
"epoch": 3.33,
"learning_rate": 7.047133757961782e-05,
"loss": 1.2302,
"step": 2950
},
{
"epoch": 3.39,
"learning_rate": 7.023248407643311e-05,
"loss": 1.2283,
"step": 3000
},
{
"epoch": 3.39,
"eval_loss": 0.3645668029785156,
"eval_runtime": 430.0013,
"eval_samples_per_second": 14.437,
"eval_steps_per_second": 0.902,
"eval_wer": 0.2828290377124067,
"step": 3000
},
{
"epoch": 3.45,
"learning_rate": 6.99936305732484e-05,
"loss": 1.1993,
"step": 3050
},
{
"epoch": 3.5,
"learning_rate": 6.975477707006369e-05,
"loss": 1.2627,
"step": 3100
},
{
"epoch": 3.56,
"learning_rate": 6.951592356687897e-05,
"loss": 1.1969,
"step": 3150
},
{
"epoch": 3.62,
"learning_rate": 6.927707006369426e-05,
"loss": 1.2054,
"step": 3200
},
{
"epoch": 3.67,
"learning_rate": 6.903821656050954e-05,
"loss": 1.2053,
"step": 3250
},
{
"epoch": 3.67,
"eval_loss": 0.3499177098274231,
"eval_runtime": 429.329,
"eval_samples_per_second": 14.46,
"eval_steps_per_second": 0.904,
"eval_wer": 0.2747578166599718,
"step": 3250
},
{
"epoch": 3.73,
"learning_rate": 6.879936305732483e-05,
"loss": 1.2144,
"step": 3300
},
{
"epoch": 3.78,
"learning_rate": 6.856050955414011e-05,
"loss": 1.1882,
"step": 3350
},
{
"epoch": 3.84,
"learning_rate": 6.832165605095541e-05,
"loss": 1.1901,
"step": 3400
},
{
"epoch": 3.9,
"learning_rate": 6.808280254777069e-05,
"loss": 1.2064,
"step": 3450
},
{
"epoch": 3.95,
"learning_rate": 6.784394904458598e-05,
"loss": 1.2087,
"step": 3500
},
{
"epoch": 3.95,
"eval_loss": 0.3345482349395752,
"eval_runtime": 430.4222,
"eval_samples_per_second": 14.423,
"eval_steps_per_second": 0.901,
"eval_wer": 0.2602781955589601,
"step": 3500
},
{
"epoch": 4.01,
"learning_rate": 6.760509554140126e-05,
"loss": 1.1945,
"step": 3550
},
{
"epoch": 4.07,
"learning_rate": 6.736624203821655e-05,
"loss": 1.1674,
"step": 3600
},
{
"epoch": 4.12,
"learning_rate": 6.712738853503183e-05,
"loss": 1.2197,
"step": 3650
},
{
"epoch": 4.18,
"learning_rate": 6.688853503184713e-05,
"loss": 1.1832,
"step": 3700
},
{
"epoch": 4.24,
"learning_rate": 6.664968152866241e-05,
"loss": 1.2002,
"step": 3750
},
{
"epoch": 4.24,
"eval_loss": 0.3320307731628418,
"eval_runtime": 429.9654,
"eval_samples_per_second": 14.438,
"eval_steps_per_second": 0.902,
"eval_wer": 0.25228170803478844,
"step": 3750
},
{
"epoch": 4.29,
"learning_rate": 6.64108280254777e-05,
"loss": 1.1655,
"step": 3800
},
{
"epoch": 4.35,
"learning_rate": 6.617197452229298e-05,
"loss": 1.1387,
"step": 3850
},
{
"epoch": 4.41,
"learning_rate": 6.593312101910828e-05,
"loss": 1.1344,
"step": 3900
},
{
"epoch": 4.46,
"learning_rate": 6.569426751592356e-05,
"loss": 1.169,
"step": 3950
},
{
"epoch": 4.52,
"learning_rate": 6.545541401273885e-05,
"loss": 1.1383,
"step": 4000
},
{
"epoch": 4.52,
"eval_loss": 0.31172633171081543,
"eval_runtime": 428.4618,
"eval_samples_per_second": 14.489,
"eval_steps_per_second": 0.906,
"eval_wer": 0.24393957794239912,
"step": 4000
},
{
"epoch": 4.58,
"learning_rate": 6.521656050955413e-05,
"loss": 1.1241,
"step": 4050
},
{
"epoch": 4.63,
"learning_rate": 6.497770700636942e-05,
"loss": 1.1505,
"step": 4100
},
{
"epoch": 4.69,
"learning_rate": 6.47388535031847e-05,
"loss": 1.1309,
"step": 4150
},
{
"epoch": 4.75,
"learning_rate": 6.45e-05,
"loss": 1.1368,
"step": 4200
},
{
"epoch": 4.8,
"learning_rate": 6.426114649681528e-05,
"loss": 1.1364,
"step": 4250
},
{
"epoch": 4.8,
"eval_loss": 0.3198467195034027,
"eval_runtime": 427.239,
"eval_samples_per_second": 14.531,
"eval_steps_per_second": 0.908,
"eval_wer": 0.2382878548674881,
"step": 4250
},
{
"epoch": 4.86,
"learning_rate": 6.402229299363057e-05,
"loss": 1.1185,
"step": 4300
},
{
"epoch": 4.91,
"learning_rate": 6.378343949044585e-05,
"loss": 1.1214,
"step": 4350
},
{
"epoch": 4.97,
"learning_rate": 6.354458598726114e-05,
"loss": 1.1188,
"step": 4400
},
{
"epoch": 5.03,
"learning_rate": 6.330573248407642e-05,
"loss": 1.1327,
"step": 4450
},
{
"epoch": 5.08,
"learning_rate": 6.306687898089172e-05,
"loss": 1.158,
"step": 4500
},
{
"epoch": 5.08,
"eval_loss": 0.3070796728134155,
"eval_runtime": 427.2037,
"eval_samples_per_second": 14.532,
"eval_steps_per_second": 0.908,
"eval_wer": 0.23418685250404028,
"step": 4500
},
{
"epoch": 5.14,
"learning_rate": 6.2828025477707e-05,
"loss": 1.1221,
"step": 4550
},
{
"epoch": 5.2,
"learning_rate": 6.258917197452229e-05,
"loss": 1.1167,
"step": 4600
},
{
"epoch": 5.25,
"learning_rate": 6.235031847133757e-05,
"loss": 1.1067,
"step": 4650
},
{
"epoch": 5.31,
"learning_rate": 6.211146496815286e-05,
"loss": 1.099,
"step": 4700
},
{
"epoch": 5.37,
"learning_rate": 6.187261146496814e-05,
"loss": 1.108,
"step": 4750
},
{
"epoch": 5.37,
"eval_loss": 0.3011206090450287,
"eval_runtime": 430.4576,
"eval_samples_per_second": 14.422,
"eval_steps_per_second": 0.901,
"eval_wer": 0.23136566181210122,
"step": 4750
},
{
"epoch": 5.42,
"learning_rate": 6.163375796178344e-05,
"loss": 1.1024,
"step": 4800
},
{
"epoch": 5.48,
"learning_rate": 6.139490445859872e-05,
"loss": 1.1039,
"step": 4850
},
{
"epoch": 5.54,
"learning_rate": 6.115605095541401e-05,
"loss": 1.1082,
"step": 4900
},
{
"epoch": 5.59,
"learning_rate": 6.09171974522293e-05,
"loss": 1.0982,
"step": 4950
},
{
"epoch": 5.65,
"learning_rate": 6.0678343949044583e-05,
"loss": 1.1025,
"step": 5000
},
{
"epoch": 5.65,
"eval_loss": 0.28753861784935,
"eval_runtime": 431.3779,
"eval_samples_per_second": 14.391,
"eval_steps_per_second": 0.899,
"eval_wer": 0.2289368221435444,
"step": 5000
},
{
"epoch": 5.71,
"learning_rate": 6.043949044585987e-05,
"loss": 1.089,
"step": 5050
},
{
"epoch": 5.76,
"learning_rate": 6.020063694267516e-05,
"loss": 1.0792,
"step": 5100
},
{
"epoch": 5.82,
"learning_rate": 5.9961783439490444e-05,
"loss": 1.1054,
"step": 5150
},
{
"epoch": 5.87,
"learning_rate": 5.972770700636942e-05,
"loss": 1.078,
"step": 5200
},
{
"epoch": 5.93,
"learning_rate": 5.948885350318471e-05,
"loss": 1.0697,
"step": 5250
},
{
"epoch": 5.93,
"eval_loss": 0.29261597990989685,
"eval_runtime": 429.3286,
"eval_samples_per_second": 14.46,
"eval_steps_per_second": 0.904,
"eval_wer": 0.22559249675376236,
"step": 5250
},
{
"epoch": 5.99,
"learning_rate": 5.925e-05,
"loss": 1.1183,
"step": 5300
},
{
"epoch": 6.05,
"learning_rate": 5.9011146496815284e-05,
"loss": 1.1614,
"step": 5350
},
{
"epoch": 6.1,
"learning_rate": 5.877229299363057e-05,
"loss": 1.075,
"step": 5400
},
{
"epoch": 6.16,
"learning_rate": 5.853343949044586e-05,
"loss": 1.0901,
"step": 5450
},
{
"epoch": 6.21,
"learning_rate": 5.8294585987261144e-05,
"loss": 1.0904,
"step": 5500
},
{
"epoch": 6.21,
"eval_loss": 0.2695116698741913,
"eval_runtime": 431.1678,
"eval_samples_per_second": 14.398,
"eval_steps_per_second": 0.9,
"eval_wer": 0.22445281044774726,
"step": 5500
},
{
"epoch": 6.27,
"learning_rate": 5.805573248407643e-05,
"loss": 1.0577,
"step": 5550
},
{
"epoch": 6.33,
"learning_rate": 5.781687898089172e-05,
"loss": 1.0693,
"step": 5600
},
{
"epoch": 6.38,
"learning_rate": 5.7578025477707004e-05,
"loss": 1.0784,
"step": 5650
},
{
"epoch": 6.44,
"learning_rate": 5.733917197452229e-05,
"loss": 1.0754,
"step": 5700
},
{
"epoch": 6.5,
"learning_rate": 5.710031847133758e-05,
"loss": 1.0802,
"step": 5750
},
{
"epoch": 6.5,
"eval_loss": 0.26020729541778564,
"eval_runtime": 433.3184,
"eval_samples_per_second": 14.327,
"eval_steps_per_second": 0.895,
"eval_wer": 0.21889450428316534,
"step": 5750
},
{
"epoch": 6.55,
"learning_rate": 5.6861464968152864e-05,
"loss": 1.0459,
"step": 5800
},
{
"epoch": 6.61,
"learning_rate": 5.662261146496815e-05,
"loss": 1.0492,
"step": 5850
},
{
"epoch": 6.67,
"learning_rate": 5.638375796178344e-05,
"loss": 1.0526,
"step": 5900
},
{
"epoch": 6.72,
"learning_rate": 5.6144904458598724e-05,
"loss": 1.079,
"step": 5950
},
{
"epoch": 6.78,
"learning_rate": 5.590605095541401e-05,
"loss": 1.0882,
"step": 6000
},
{
"epoch": 6.78,
"eval_loss": 0.2602781653404236,
"eval_runtime": 434.4762,
"eval_samples_per_second": 14.288,
"eval_steps_per_second": 0.893,
"eval_wer": 0.21684867394695787,
"step": 6000
},
{
"epoch": 6.84,
"learning_rate": 5.56671974522293e-05,
"loss": 1.0691,
"step": 6050
},
{
"epoch": 6.89,
"learning_rate": 5.5428343949044585e-05,
"loss": 1.0728,
"step": 6100
},
{
"epoch": 6.95,
"learning_rate": 5.518949044585987e-05,
"loss": 1.0308,
"step": 6150
},
{
"epoch": 7.01,
"learning_rate": 5.4955414012738844e-05,
"loss": 1.0894,
"step": 6200
},
{
"epoch": 7.06,
"learning_rate": 5.471656050955413e-05,
"loss": 1.0881,
"step": 6250
},
{
"epoch": 7.06,
"eval_loss": 0.25403761863708496,
"eval_runtime": 433.991,
"eval_samples_per_second": 14.304,
"eval_steps_per_second": 0.894,
"eval_wer": 0.2292544396386634,
"step": 6250
},
{
"epoch": 7.12,
"learning_rate": 5.447770700636942e-05,
"loss": 1.0295,
"step": 6300
},
{
"epoch": 7.17,
"learning_rate": 5.4238853503184704e-05,
"loss": 1.0389,
"step": 6350
},
{
"epoch": 7.23,
"learning_rate": 5.399999999999999e-05,
"loss": 1.0415,
"step": 6400
},
{
"epoch": 7.29,
"learning_rate": 5.376114649681528e-05,
"loss": 1.0492,
"step": 6450
},
{
"epoch": 7.34,
"learning_rate": 5.3522292993630565e-05,
"loss": 1.0378,
"step": 6500
},
{
"epoch": 7.34,
"eval_loss": 0.2614484429359436,
"eval_runtime": 432.0675,
"eval_samples_per_second": 14.368,
"eval_steps_per_second": 0.898,
"eval_wer": 0.21932422207067923,
"step": 6500
},
{
"epoch": 7.4,
"learning_rate": 5.328343949044585e-05,
"loss": 1.0362,
"step": 6550
},
{
"epoch": 7.46,
"learning_rate": 5.304458598726114e-05,
"loss": 1.0444,
"step": 6600
},
{
"epoch": 7.51,
"learning_rate": 5.2805732484076425e-05,
"loss": 1.0626,
"step": 6650
},
{
"epoch": 7.57,
"learning_rate": 5.256687898089171e-05,
"loss": 1.0307,
"step": 6700
},
{
"epoch": 7.63,
"learning_rate": 5.2328025477707e-05,
"loss": 1.0397,
"step": 6750
},
{
"epoch": 7.63,
"eval_loss": 0.27073222398757935,
"eval_runtime": 432.0598,
"eval_samples_per_second": 14.368,
"eval_steps_per_second": 0.898,
"eval_wer": 0.21041224882528237,
"step": 6750
},
{
"epoch": 7.68,
"learning_rate": 5.2089171974522285e-05,
"loss": 1.0481,
"step": 6800
},
{
"epoch": 7.74,
"learning_rate": 5.185031847133757e-05,
"loss": 1.042,
"step": 6850
},
{
"epoch": 7.8,
"learning_rate": 5.161146496815286e-05,
"loss": 1.0298,
"step": 6900
},
{
"epoch": 7.85,
"learning_rate": 5.1372611464968145e-05,
"loss": 1.0269,
"step": 6950
},
{
"epoch": 7.91,
"learning_rate": 5.113375796178343e-05,
"loss": 1.0296,
"step": 7000
},
{
"epoch": 7.91,
"eval_loss": 0.248311385512352,
"eval_runtime": 431.8203,
"eval_samples_per_second": 14.376,
"eval_steps_per_second": 0.899,
"eval_wer": 0.2119256027726139,
"step": 7000
},
{
"epoch": 7.97,
"learning_rate": 5.089490445859872e-05,
"loss": 1.0276,
"step": 7050
},
{
"epoch": 8.02,
"learning_rate": 5.0656050955414005e-05,
"loss": 1.0481,
"step": 7100
},
{
"epoch": 8.08,
"learning_rate": 5.041719745222929e-05,
"loss": 1.006,
"step": 7150
},
{
"epoch": 8.14,
"learning_rate": 5.017834394904458e-05,
"loss": 1.0215,
"step": 7200
},
{
"epoch": 8.19,
"learning_rate": 4.9939490445859866e-05,
"loss": 1.0249,
"step": 7250
},
{
"epoch": 8.19,
"eval_loss": 0.24828839302062988,
"eval_runtime": 429.7696,
"eval_samples_per_second": 14.445,
"eval_steps_per_second": 0.903,
"eval_wer": 0.20468579222210806,
"step": 7250
},
{
"epoch": 8.25,
"learning_rate": 4.970063694267515e-05,
"loss": 1.0109,
"step": 7300
},
{
"epoch": 8.3,
"learning_rate": 4.946178343949044e-05,
"loss": 1.0154,
"step": 7350
},
{
"epoch": 8.36,
"learning_rate": 4.9222929936305726e-05,
"loss": 1.0123,
"step": 7400
},
{
"epoch": 8.42,
"learning_rate": 4.898407643312101e-05,
"loss": 1.0126,
"step": 7450
},
{
"epoch": 8.47,
"learning_rate": 4.87452229299363e-05,
"loss": 1.013,
"step": 7500
},
{
"epoch": 8.47,
"eval_loss": 0.24869437515735626,
"eval_runtime": 430.836,
"eval_samples_per_second": 14.409,
"eval_steps_per_second": 0.901,
"eval_wer": 0.20419068259736378,
"step": 7500
},
{
"epoch": 8.53,
"learning_rate": 4.8506369426751586e-05,
"loss": 1.0077,
"step": 7550
},
{
"epoch": 8.59,
"learning_rate": 4.826751592356687e-05,
"loss": 1.0256,
"step": 7600
},
{
"epoch": 8.64,
"learning_rate": 4.802866242038216e-05,
"loss": 1.0627,
"step": 7650
},
{
"epoch": 8.7,
"learning_rate": 4.7789808917197446e-05,
"loss": 0.9883,
"step": 7700
},
{
"epoch": 8.76,
"learning_rate": 4.755095541401273e-05,
"loss": 1.0064,
"step": 7750
},
{
"epoch": 8.76,
"eval_loss": 0.24558775126934052,
"eval_runtime": 430.9773,
"eval_samples_per_second": 14.404,
"eval_steps_per_second": 0.9,
"eval_wer": 0.20164974263641205,
"step": 7750
},
{
"epoch": 8.81,
"learning_rate": 4.731210191082802e-05,
"loss": 1.0137,
"step": 7800
},
{
"epoch": 8.87,
"learning_rate": 4.7073248407643306e-05,
"loss": 1.0178,
"step": 7850
},
{
"epoch": 8.93,
"learning_rate": 4.683439490445859e-05,
"loss": 1.0035,
"step": 7900
},
{
"epoch": 8.98,
"learning_rate": 4.659554140127388e-05,
"loss": 1.0457,
"step": 7950
},
{
"epoch": 9.04,
"learning_rate": 4.6356687898089167e-05,
"loss": 1.0668,
"step": 8000
},
{
"epoch": 9.04,
"eval_loss": 0.2397284209728241,
"eval_runtime": 430.6925,
"eval_samples_per_second": 14.414,
"eval_steps_per_second": 0.901,
"eval_wer": 0.19949181200780966,
"step": 8000
},
{
"epoch": 9.1,
"learning_rate": 4.611783439490445e-05,
"loss": 1.0054,
"step": 8050
},
{
"epoch": 9.15,
"learning_rate": 4.587898089171974e-05,
"loss": 1.0224,
"step": 8100
},
{
"epoch": 9.21,
"learning_rate": 4.564012738853503e-05,
"loss": 1.0019,
"step": 8150
},
{
"epoch": 9.27,
"learning_rate": 4.5401273885350314e-05,
"loss": 1.0033,
"step": 8200
},
{
"epoch": 9.32,
"learning_rate": 4.51624203821656e-05,
"loss": 1.0129,
"step": 8250
},
{
"epoch": 9.32,
"eval_loss": 0.23742474615573883,
"eval_runtime": 432.9935,
"eval_samples_per_second": 14.337,
"eval_steps_per_second": 0.896,
"eval_wer": 0.19942642017057927,
"step": 8250
},
{
"epoch": 9.38,
"learning_rate": 4.492356687898089e-05,
"loss": 0.9864,
"step": 8300
},
{
"epoch": 9.43,
"learning_rate": 4.4689490445859874e-05,
"loss": 1.0021,
"step": 8350
},
{
"epoch": 9.49,
"learning_rate": 4.445063694267516e-05,
"loss": 1.0073,
"step": 8400
},
{
"epoch": 9.55,
"learning_rate": 4.421178343949045e-05,
"loss": 0.9999,
"step": 8450
},
{
"epoch": 9.6,
"learning_rate": 4.3972929936305734e-05,
"loss": 1.0164,
"step": 8500
},
{
"epoch": 9.6,
"eval_loss": 0.2206413298845291,
"eval_runtime": 431.5354,
"eval_samples_per_second": 14.386,
"eval_steps_per_second": 0.899,
"eval_wer": 0.19915551113062488,
"step": 8500
},
{
"epoch": 9.66,
"learning_rate": 4.373407643312102e-05,
"loss": 0.9956,
"step": 8550
},
{
"epoch": 9.72,
"learning_rate": 4.349522292993631e-05,
"loss": 0.9662,
"step": 8600
},
{
"epoch": 9.77,
"learning_rate": 4.3256369426751594e-05,
"loss": 0.9781,
"step": 8650
},
{
"epoch": 9.83,
"learning_rate": 4.301751592356688e-05,
"loss": 0.9863,
"step": 8700
},
{
"epoch": 9.89,
"learning_rate": 4.277866242038217e-05,
"loss": 0.975,
"step": 8750
},
{
"epoch": 9.89,
"eval_loss": 0.22473624348640442,
"eval_runtime": 432.1534,
"eval_samples_per_second": 14.365,
"eval_steps_per_second": 0.898,
"eval_wer": 0.19731519799714145,
"step": 8750
},
{
"epoch": 9.94,
"learning_rate": 4.2539808917197454e-05,
"loss": 0.9931,
"step": 8800
},
{
"epoch": 10.0,
"learning_rate": 4.230095541401274e-05,
"loss": 1.0101,
"step": 8850
},
{
"epoch": 10.06,
"learning_rate": 4.206210191082803e-05,
"loss": 1.0034,
"step": 8900
},
{
"epoch": 10.11,
"learning_rate": 4.1823248407643314e-05,
"loss": 1.0018,
"step": 8950
},
{
"epoch": 10.17,
"learning_rate": 4.15843949044586e-05,
"loss": 0.9849,
"step": 9000
},
{
"epoch": 10.17,
"eval_loss": 0.23245184123516083,
"eval_runtime": 431.4778,
"eval_samples_per_second": 14.388,
"eval_steps_per_second": 0.899,
"eval_wer": 0.19526002596990108,
"step": 9000
},
{
"epoch": 10.23,
"learning_rate": 4.134554140127389e-05,
"loss": 0.9953,
"step": 9050
},
{
"epoch": 10.28,
"learning_rate": 4.1106687898089175e-05,
"loss": 0.9639,
"step": 9100
},
{
"epoch": 10.34,
"learning_rate": 4.086783439490446e-05,
"loss": 0.9862,
"step": 9150
},
{
"epoch": 10.4,
"learning_rate": 4.062898089171975e-05,
"loss": 1.0222,
"step": 9200
},
{
"epoch": 10.45,
"learning_rate": 4.0390127388535035e-05,
"loss": 0.9826,
"step": 9250
},
{
"epoch": 10.45,
"eval_loss": 0.2301308959722519,
"eval_runtime": 432.6762,
"eval_samples_per_second": 14.348,
"eval_steps_per_second": 0.897,
"eval_wer": 0.1933730043812531,
"step": 9250
},
{
"epoch": 10.51,
"learning_rate": 4.015127388535032e-05,
"loss": 0.9867,
"step": 9300
},
{
"epoch": 10.56,
"learning_rate": 3.991242038216561e-05,
"loss": 0.9687,
"step": 9350
},
{
"epoch": 10.62,
"learning_rate": 3.9673566878980895e-05,
"loss": 0.9715,
"step": 9400
},
{
"epoch": 10.68,
"learning_rate": 3.943471337579618e-05,
"loss": 0.9914,
"step": 9450
},
{
"epoch": 10.73,
"learning_rate": 3.919585987261147e-05,
"loss": 0.9835,
"step": 9500
},
{
"epoch": 10.73,
"eval_loss": 0.2191852629184723,
"eval_runtime": 439.1976,
"eval_samples_per_second": 14.135,
"eval_steps_per_second": 0.883,
"eval_wer": 0.19420441488318216,
"step": 9500
},
{
"epoch": 10.79,
"learning_rate": 3.8957006369426755e-05,
"loss": 0.9652,
"step": 9550
},
{
"epoch": 10.85,
"learning_rate": 3.871815286624204e-05,
"loss": 0.9614,
"step": 9600
},
{
"epoch": 10.9,
"learning_rate": 3.847929936305733e-05,
"loss": 0.97,
"step": 9650
},
{
"epoch": 10.96,
"learning_rate": 3.8240445859872615e-05,
"loss": 0.9764,
"step": 9700
},
{
"epoch": 11.02,
"learning_rate": 3.80015923566879e-05,
"loss": 0.9676,
"step": 9750
},
{
"epoch": 11.02,
"eval_loss": 0.2265927493572235,
"eval_runtime": 430.7748,
"eval_samples_per_second": 14.411,
"eval_steps_per_second": 0.901,
"eval_wer": 0.19133651573607854,
"step": 9750
},
{
"epoch": 11.07,
"learning_rate": 3.776273885350319e-05,
"loss": 0.9609,
"step": 9800
},
{
"epoch": 11.13,
"learning_rate": 3.7523885350318475e-05,
"loss": 0.9721,
"step": 9850
},
{
"epoch": 11.19,
"learning_rate": 3.7285031847133755e-05,
"loss": 0.9669,
"step": 9900
},
{
"epoch": 11.24,
"learning_rate": 3.704617834394904e-05,
"loss": 0.9643,
"step": 9950
},
{
"epoch": 11.3,
"learning_rate": 3.680732484076433e-05,
"loss": 0.9627,
"step": 10000
},
{
"epoch": 11.3,
"eval_loss": 0.2193416953086853,
"eval_runtime": 432.6083,
"eval_samples_per_second": 14.35,
"eval_steps_per_second": 0.897,
"eval_wer": 0.19205582594561268,
"step": 10000
},
{
"epoch": 11.36,
"learning_rate": 3.6568471337579616e-05,
"loss": 1.0179,
"step": 10050
},
{
"epoch": 11.41,
"learning_rate": 3.63296178343949e-05,
"loss": 0.9575,
"step": 10100
},
{
"epoch": 11.47,
"learning_rate": 3.609076433121019e-05,
"loss": 0.98,
"step": 10150
},
{
"epoch": 11.52,
"learning_rate": 3.5851910828025476e-05,
"loss": 0.9542,
"step": 10200
},
{
"epoch": 11.58,
"learning_rate": 3.561305732484076e-05,
"loss": 0.976,
"step": 10250
},
{
"epoch": 11.58,
"eval_loss": 0.23090308904647827,
"eval_runtime": 432.1501,
"eval_samples_per_second": 14.365,
"eval_steps_per_second": 0.898,
"eval_wer": 0.1881790241669547,
"step": 10250
},
{
"epoch": 11.64,
"learning_rate": 3.537420382165605e-05,
"loss": 0.972,
"step": 10300
},
{
"epoch": 11.69,
"learning_rate": 3.5135350318471336e-05,
"loss": 0.9634,
"step": 10350
},
{
"epoch": 11.75,
"learning_rate": 3.489649681528662e-05,
"loss": 0.9682,
"step": 10400
},
{
"epoch": 11.81,
"learning_rate": 3.465764331210191e-05,
"loss": 0.9638,
"step": 10450
},
{
"epoch": 11.86,
"learning_rate": 3.4418789808917196e-05,
"loss": 0.969,
"step": 10500
},
{
"epoch": 11.86,
"eval_loss": 0.2268366813659668,
"eval_runtime": 433.2795,
"eval_samples_per_second": 14.328,
"eval_steps_per_second": 0.895,
"eval_wer": 0.18859005857240277,
"step": 10500
},
{
"epoch": 11.92,
"learning_rate": 3.417993630573248e-05,
"loss": 0.9698,
"step": 10550
},
{
"epoch": 11.98,
"learning_rate": 3.394108280254777e-05,
"loss": 0.9369,
"step": 10600
},
{
"epoch": 12.03,
"learning_rate": 3.3702229299363056e-05,
"loss": 0.9699,
"step": 10650
},
{
"epoch": 12.09,
"learning_rate": 3.346337579617834e-05,
"loss": 1.0013,
"step": 10700
},
{
"epoch": 12.15,
"learning_rate": 3.322929936305732e-05,
"loss": 0.9611,
"step": 10750
},
{
"epoch": 12.15,
"eval_loss": 0.2322191596031189,
"eval_runtime": 429.3587,
"eval_samples_per_second": 14.459,
"eval_steps_per_second": 0.904,
"eval_wer": 0.18626397750520798,
"step": 10750
},
{
"epoch": 12.2,
"learning_rate": 3.299044585987261e-05,
"loss": 0.9418,
"step": 10800
},
{
"epoch": 12.26,
"learning_rate": 3.2751592356687896e-05,
"loss": 0.9582,
"step": 10850
},
{
"epoch": 12.32,
"learning_rate": 3.251273885350318e-05,
"loss": 0.945,
"step": 10900
},
{
"epoch": 12.37,
"learning_rate": 3.227388535031847e-05,
"loss": 0.9386,
"step": 10950
},
{
"epoch": 12.43,
"learning_rate": 3.2035031847133757e-05,
"loss": 0.9397,
"step": 11000
},
{
"epoch": 12.43,
"eval_loss": 0.21969455480575562,
"eval_runtime": 432.2628,
"eval_samples_per_second": 14.362,
"eval_steps_per_second": 0.898,
"eval_wer": 0.1843676142255271,
"step": 11000
},
{
"epoch": 12.49,
"learning_rate": 3.179617834394904e-05,
"loss": 0.9594,
"step": 11050
},
{
"epoch": 12.54,
"learning_rate": 3.155732484076433e-05,
"loss": 0.9467,
"step": 11100
},
{
"epoch": 12.6,
"learning_rate": 3.131847133757962e-05,
"loss": 0.9609,
"step": 11150
},
{
"epoch": 12.65,
"learning_rate": 3.1079617834394904e-05,
"loss": 0.9446,
"step": 11200
},
{
"epoch": 12.71,
"learning_rate": 3.084076433121019e-05,
"loss": 0.9601,
"step": 11250
},
{
"epoch": 12.71,
"eval_loss": 0.22107979655265808,
"eval_runtime": 432.9535,
"eval_samples_per_second": 14.339,
"eval_steps_per_second": 0.896,
"eval_wer": 0.18711407138920289,
"step": 11250
},
{
"epoch": 12.77,
"learning_rate": 3.060191082802548e-05,
"loss": 0.9497,
"step": 11300
},
{
"epoch": 12.82,
"learning_rate": 3.036305732484076e-05,
"loss": 0.939,
"step": 11350
},
{
"epoch": 12.88,
"learning_rate": 3.0124203821656047e-05,
"loss": 0.9462,
"step": 11400
},
{
"epoch": 12.94,
"learning_rate": 2.9885350318471334e-05,
"loss": 0.9243,
"step": 11450
},
{
"epoch": 12.99,
"learning_rate": 2.964649681528662e-05,
"loss": 0.9718,
"step": 11500
},
{
"epoch": 12.99,
"eval_loss": 0.20792651176452637,
"eval_runtime": 429.7801,
"eval_samples_per_second": 14.445,
"eval_steps_per_second": 0.903,
"eval_wer": 0.189823161788747,
"step": 11500
},
{
"epoch": 13.05,
"learning_rate": 2.9407643312101907e-05,
"loss": 0.9543,
"step": 11550
},
{
"epoch": 13.11,
"learning_rate": 2.9168789808917194e-05,
"loss": 0.9386,
"step": 11600
},
{
"epoch": 13.16,
"learning_rate": 2.892993630573248e-05,
"loss": 0.9662,
"step": 11650
},
{
"epoch": 13.22,
"learning_rate": 2.8691082802547767e-05,
"loss": 0.9426,
"step": 11700
},
{
"epoch": 13.28,
"learning_rate": 2.8452229299363054e-05,
"loss": 0.9347,
"step": 11750
},
{
"epoch": 13.28,
"eval_loss": 0.2053879350423813,
"eval_runtime": 427.5266,
"eval_samples_per_second": 14.521,
"eval_steps_per_second": 0.908,
"eval_wer": 0.1842835390062309,
"step": 11750
},
{
"epoch": 13.33,
"learning_rate": 2.821337579617834e-05,
"loss": 0.9579,
"step": 11800
},
{
"epoch": 13.39,
"learning_rate": 2.7974522292993628e-05,
"loss": 0.9313,
"step": 11850
},
{
"epoch": 13.45,
"learning_rate": 2.7735668789808914e-05,
"loss": 0.9295,
"step": 11900
},
{
"epoch": 13.5,
"learning_rate": 2.74968152866242e-05,
"loss": 0.9437,
"step": 11950
},
{
"epoch": 13.56,
"learning_rate": 2.7257961783439488e-05,
"loss": 0.9377,
"step": 12000
},
{
"epoch": 13.56,
"eval_loss": 0.20305366814136505,
"eval_runtime": 429.8935,
"eval_samples_per_second": 14.441,
"eval_steps_per_second": 0.903,
"eval_wer": 0.18423683055106635,
"step": 12000
},
{
"epoch": 13.62,
"learning_rate": 2.7019108280254775e-05,
"loss": 0.9273,
"step": 12050
},
{
"epoch": 13.67,
"learning_rate": 2.678025477707006e-05,
"loss": 0.9804,
"step": 12100
},
{
"epoch": 13.73,
"learning_rate": 2.6541401273885348e-05,
"loss": 0.9392,
"step": 12150
},
{
"epoch": 13.78,
"learning_rate": 2.6302547770700635e-05,
"loss": 0.9379,
"step": 12200
},
{
"epoch": 13.84,
"learning_rate": 2.606369426751592e-05,
"loss": 0.934,
"step": 12250
},
{
"epoch": 13.84,
"eval_loss": 0.20586800575256348,
"eval_runtime": 428.3313,
"eval_samples_per_second": 14.493,
"eval_steps_per_second": 0.906,
"eval_wer": 0.18060291273926407,
"step": 12250
},
{
"epoch": 13.9,
"learning_rate": 2.5824840764331208e-05,
"loss": 0.9177,
"step": 12300
},
{
"epoch": 13.95,
"learning_rate": 2.5585987261146495e-05,
"loss": 0.9369,
"step": 12350
},
{
"epoch": 14.01,
"learning_rate": 2.534713375796178e-05,
"loss": 0.9438,
"step": 12400
},
{
"epoch": 14.07,
"learning_rate": 2.510828025477707e-05,
"loss": 0.9341,
"step": 12450
},
{
"epoch": 14.12,
"learning_rate": 2.4869426751592355e-05,
"loss": 0.9295,
"step": 12500
},
{
"epoch": 14.12,
"eval_loss": 0.21221554279327393,
"eval_runtime": 432.7246,
"eval_samples_per_second": 14.346,
"eval_steps_per_second": 0.897,
"eval_wer": 0.18605846030248396,
"step": 12500
},
{
"epoch": 14.18,
"learning_rate": 2.4630573248407642e-05,
"loss": 0.9239,
"step": 12550
},
{
"epoch": 14.24,
"learning_rate": 2.439171974522293e-05,
"loss": 0.9235,
"step": 12600
},
{
"epoch": 14.29,
"learning_rate": 2.4152866242038215e-05,
"loss": 0.9631,
"step": 12650
},
{
"epoch": 14.35,
"learning_rate": 2.3914012738853502e-05,
"loss": 0.9467,
"step": 12700
},
{
"epoch": 14.41,
"learning_rate": 2.367515923566879e-05,
"loss": 0.935,
"step": 12750
},
{
"epoch": 14.41,
"eval_loss": 0.20723822712898254,
"eval_runtime": 429.493,
"eval_samples_per_second": 14.454,
"eval_steps_per_second": 0.903,
"eval_wer": 0.17866918269545154,
"step": 12750
},
{
"epoch": 14.46,
"learning_rate": 2.3436305732484076e-05,
"loss": 0.9319,
"step": 12800
},
{
"epoch": 14.52,
"learning_rate": 2.3197452229299362e-05,
"loss": 0.9337,
"step": 12850
},
{
"epoch": 14.58,
"learning_rate": 2.295859872611465e-05,
"loss": 0.9259,
"step": 12900
},
{
"epoch": 14.63,
"learning_rate": 2.2719745222929936e-05,
"loss": 0.9228,
"step": 12950
},
{
"epoch": 14.69,
"learning_rate": 2.2480891719745222e-05,
"loss": 0.9021,
"step": 13000
},
{
"epoch": 14.69,
"eval_loss": 0.21045178174972534,
"eval_runtime": 428.8167,
"eval_samples_per_second": 14.477,
"eval_steps_per_second": 0.905,
"eval_wer": 0.1781273646155427,
"step": 13000
},
{
"epoch": 14.75,
"learning_rate": 2.224203821656051e-05,
"loss": 0.9238,
"step": 13050
},
{
"epoch": 14.8,
"learning_rate": 2.2003184713375796e-05,
"loss": 0.9373,
"step": 13100
},
{
"epoch": 14.86,
"learning_rate": 2.1764331210191083e-05,
"loss": 0.9365,
"step": 13150
},
{
"epoch": 14.91,
"learning_rate": 2.152547770700637e-05,
"loss": 0.9656,
"step": 13200
},
{
"epoch": 14.97,
"learning_rate": 2.1286624203821656e-05,
"loss": 0.9193,
"step": 13250
},
{
"epoch": 14.97,
"eval_loss": 0.20348267257213593,
"eval_runtime": 430.2042,
"eval_samples_per_second": 14.43,
"eval_steps_per_second": 0.902,
"eval_wer": 0.17860379085822115,
"step": 13250
},
{
"epoch": 15.03,
"learning_rate": 2.1047770700636943e-05,
"loss": 0.9366,
"step": 13300
},
{
"epoch": 15.08,
"learning_rate": 2.080891719745223e-05,
"loss": 0.9129,
"step": 13350
},
{
"epoch": 15.14,
"learning_rate": 2.0570063694267513e-05,
"loss": 0.9032,
"step": 13400
},
{
"epoch": 15.2,
"learning_rate": 2.03312101910828e-05,
"loss": 0.9152,
"step": 13450
},
{
"epoch": 15.25,
"learning_rate": 2.0092356687898086e-05,
"loss": 0.9214,
"step": 13500
},
{
"epoch": 15.25,
"eval_loss": 0.2034832239151001,
"eval_runtime": 432.039,
"eval_samples_per_second": 14.369,
"eval_steps_per_second": 0.898,
"eval_wer": 0.17661401066821117,
"step": 13500
},
{
"epoch": 15.31,
"learning_rate": 1.9853503184713373e-05,
"loss": 0.9438,
"step": 13550
},
{
"epoch": 15.37,
"learning_rate": 1.961464968152866e-05,
"loss": 0.9262,
"step": 13600
},
{
"epoch": 15.42,
"learning_rate": 1.9375796178343947e-05,
"loss": 0.9157,
"step": 13650
},
{
"epoch": 15.48,
"learning_rate": 1.9136942675159233e-05,
"loss": 0.9299,
"step": 13700
},
{
"epoch": 15.54,
"learning_rate": 1.889808917197452e-05,
"loss": 0.9048,
"step": 13750
},
{
"epoch": 15.54,
"eval_loss": 0.19639889895915985,
"eval_runtime": 438.8483,
"eval_samples_per_second": 14.146,
"eval_steps_per_second": 0.884,
"eval_wer": 0.17581062523938082,
"step": 13750
},
{
"epoch": 15.59,
"learning_rate": 1.8659235668789807e-05,
"loss": 0.9399,
"step": 13800
},
{
"epoch": 15.65,
"learning_rate": 1.8420382165605094e-05,
"loss": 0.9309,
"step": 13850
},
{
"epoch": 15.71,
"learning_rate": 1.818152866242038e-05,
"loss": 0.9646,
"step": 13900
},
{
"epoch": 15.76,
"learning_rate": 1.7942675159235667e-05,
"loss": 0.9095,
"step": 13950
},
{
"epoch": 15.82,
"learning_rate": 1.7703821656050954e-05,
"loss": 0.9006,
"step": 14000
},
{
"epoch": 15.82,
"eval_loss": 0.19844159483909607,
"eval_runtime": 435.4721,
"eval_samples_per_second": 14.256,
"eval_steps_per_second": 0.891,
"eval_wer": 0.17574523340215045,
"step": 14000
},
{
"epoch": 15.87,
"learning_rate": 1.746496815286624e-05,
"loss": 0.8845,
"step": 14050
},
{
"epoch": 15.93,
"learning_rate": 1.7226114649681527e-05,
"loss": 0.8991,
"step": 14100
},
{
"epoch": 15.99,
"learning_rate": 1.6987261146496814e-05,
"loss": 0.9266,
"step": 14150
},
{
"epoch": 16.05,
"learning_rate": 1.67484076433121e-05,
"loss": 0.9535,
"step": 14200
},
{
"epoch": 16.1,
"learning_rate": 1.6509554140127387e-05,
"loss": 0.9027,
"step": 14250
},
{
"epoch": 16.1,
"eval_loss": 0.20223206281661987,
"eval_runtime": 434.737,
"eval_samples_per_second": 14.28,
"eval_steps_per_second": 0.892,
"eval_wer": 0.17431595467411512,
"step": 14250
},
{
"epoch": 16.16,
"learning_rate": 1.6270700636942674e-05,
"loss": 0.9095,
"step": 14300
},
{
"epoch": 16.21,
"learning_rate": 1.603184713375796e-05,
"loss": 0.9024,
"step": 14350
},
{
"epoch": 16.27,
"learning_rate": 1.5792993630573248e-05,
"loss": 0.9135,
"step": 14400
},
{
"epoch": 16.33,
"learning_rate": 1.5554140127388534e-05,
"loss": 0.9013,
"step": 14450
},
{
"epoch": 16.38,
"learning_rate": 1.531528662420382e-05,
"loss": 0.9083,
"step": 14500
},
{
"epoch": 16.38,
"eval_loss": 0.19693595170974731,
"eval_runtime": 437.2683,
"eval_samples_per_second": 14.197,
"eval_steps_per_second": 0.887,
"eval_wer": 0.1744000298934113,
"step": 14500
},
{
"epoch": 16.44,
"learning_rate": 1.5076433121019106e-05,
"loss": 0.9173,
"step": 14550
},
{
"epoch": 16.5,
"learning_rate": 1.4837579617834393e-05,
"loss": 0.9133,
"step": 14600
},
{
"epoch": 16.55,
"learning_rate": 1.459872611464968e-05,
"loss": 0.9161,
"step": 14650
},
{
"epoch": 16.61,
"learning_rate": 1.4359872611464966e-05,
"loss": 0.8844,
"step": 14700
},
{
"epoch": 16.67,
"learning_rate": 1.4121019108280253e-05,
"loss": 0.9761,
"step": 14750
},
{
"epoch": 16.67,
"eval_loss": 0.19631367921829224,
"eval_runtime": 434.2237,
"eval_samples_per_second": 14.297,
"eval_steps_per_second": 0.894,
"eval_wer": 0.17276523396265192,
"step": 14750
},
{
"epoch": 16.72,
"learning_rate": 1.388216560509554e-05,
"loss": 0.9057,
"step": 14800
},
{
"epoch": 16.78,
"learning_rate": 1.3643312101910826e-05,
"loss": 0.9128,
"step": 14850
},
{
"epoch": 16.84,
"learning_rate": 1.3404458598726113e-05,
"loss": 0.9056,
"step": 14900
},
{
"epoch": 16.89,
"learning_rate": 1.31656050955414e-05,
"loss": 0.9024,
"step": 14950
},
{
"epoch": 16.95,
"learning_rate": 1.2926751592356687e-05,
"loss": 0.9311,
"step": 15000
},
{
"epoch": 16.95,
"eval_loss": 0.19600756466388702,
"eval_runtime": 438.9128,
"eval_samples_per_second": 14.144,
"eval_steps_per_second": 0.884,
"eval_wer": 0.1736807196838772,
"step": 15000
},
{
"epoch": 17.01,
"learning_rate": 1.2687898089171973e-05,
"loss": 0.9372,
"step": 15050
},
{
"epoch": 17.06,
"learning_rate": 1.244904458598726e-05,
"loss": 0.8955,
"step": 15100
},
{
"epoch": 17.12,
"learning_rate": 1.2210191082802547e-05,
"loss": 0.909,
"step": 15150
},
{
"epoch": 17.17,
"learning_rate": 1.1971337579617834e-05,
"loss": 0.9092,
"step": 15200
},
{
"epoch": 17.23,
"learning_rate": 1.173248407643312e-05,
"loss": 0.886,
"step": 15250
},
{
"epoch": 17.23,
"eval_loss": 0.1928754597902298,
"eval_runtime": 438.297,
"eval_samples_per_second": 14.164,
"eval_steps_per_second": 0.885,
"eval_wer": 0.17263445028819116,
"step": 15250
},
{
"epoch": 17.29,
"learning_rate": 1.1493630573248407e-05,
"loss": 0.9053,
"step": 15300
},
{
"epoch": 17.34,
"learning_rate": 1.1254777070063694e-05,
"loss": 0.9056,
"step": 15350
},
{
"epoch": 17.4,
"learning_rate": 1.101592356687898e-05,
"loss": 0.9219,
"step": 15400
},
{
"epoch": 17.46,
"learning_rate": 1.0777070063694267e-05,
"loss": 0.8967,
"step": 15450
},
{
"epoch": 17.51,
"learning_rate": 1.0538216560509554e-05,
"loss": 0.8969,
"step": 15500
},
{
"epoch": 17.51,
"eval_loss": 0.1928360015153885,
"eval_runtime": 442.1109,
"eval_samples_per_second": 14.042,
"eval_steps_per_second": 0.878,
"eval_wer": 0.17337244387979112,
"step": 15500
},
{
"epoch": 17.57,
"learning_rate": 1.029936305732484e-05,
"loss": 0.8899,
"step": 15550
},
{
"epoch": 17.63,
"learning_rate": 1.0060509554140127e-05,
"loss": 0.9056,
"step": 15600
},
{
"epoch": 17.68,
"learning_rate": 9.821656050955414e-06,
"loss": 0.9048,
"step": 15650
},
{
"epoch": 17.74,
"learning_rate": 9.582802547770701e-06,
"loss": 0.9572,
"step": 15700
},
{
"epoch": 17.8,
"learning_rate": 9.34872611464968e-06,
"loss": 0.9084,
"step": 15750
},
{
"epoch": 17.8,
"eval_loss": 0.19373278319835663,
"eval_runtime": 446.3693,
"eval_samples_per_second": 13.908,
"eval_steps_per_second": 0.869,
"eval_wer": 0.17133595523461656,
"step": 15750
},
{
"epoch": 17.85,
"learning_rate": 9.109872611464967e-06,
"loss": 0.8861,
"step": 15800
},
{
"epoch": 17.91,
"learning_rate": 8.871019108280254e-06,
"loss": 0.8842,
"step": 15850
},
{
"epoch": 17.97,
"learning_rate": 8.63216560509554e-06,
"loss": 0.8949,
"step": 15900
},
{
"epoch": 18.02,
"learning_rate": 8.398089171974522e-06,
"loss": 0.8977,
"step": 15950
},
{
"epoch": 18.08,
"learning_rate": 8.159235668789809e-06,
"loss": 0.8795,
"step": 16000
},
{
"epoch": 18.08,
"eval_loss": 0.1977699100971222,
"eval_runtime": 437.7611,
"eval_samples_per_second": 14.181,
"eval_steps_per_second": 0.886,
"eval_wer": 0.17086887068297102,
"step": 16000
},
{
"epoch": 18.14,
"learning_rate": 7.920382165605094e-06,
"loss": 0.8984,
"step": 16050
},
{
"epoch": 18.19,
"learning_rate": 7.68152866242038e-06,
"loss": 0.9005,
"step": 16100
},
{
"epoch": 18.25,
"learning_rate": 7.4426751592356675e-06,
"loss": 0.8981,
"step": 16150
},
{
"epoch": 18.3,
"learning_rate": 7.203821656050954e-06,
"loss": 0.9029,
"step": 16200
},
{
"epoch": 18.36,
"learning_rate": 6.964968152866241e-06,
"loss": 0.8883,
"step": 16250
},
{
"epoch": 18.36,
"eval_loss": 0.19563348591327667,
"eval_runtime": 434.7761,
"eval_samples_per_second": 14.279,
"eval_steps_per_second": 0.892,
"eval_wer": 0.17032705260306222,
"step": 16250
},
{
"epoch": 18.42,
"learning_rate": 6.726114649681528e-06,
"loss": 0.8919,
"step": 16300
},
{
"epoch": 18.47,
"learning_rate": 6.4872611464968145e-06,
"loss": 0.8978,
"step": 16350
},
{
"epoch": 18.53,
"learning_rate": 6.248407643312101e-06,
"loss": 0.8897,
"step": 16400
},
{
"epoch": 18.59,
"learning_rate": 6.009554140127388e-06,
"loss": 0.9477,
"step": 16450
},
{
"epoch": 18.64,
"learning_rate": 5.770700636942675e-06,
"loss": 0.8901,
"step": 16500
},
{
"epoch": 18.64,
"eval_loss": 0.19332656264305115,
"eval_runtime": 439.133,
"eval_samples_per_second": 14.137,
"eval_steps_per_second": 0.884,
"eval_wer": 0.17053256980578624,
"step": 16500
},
{
"epoch": 18.7,
"learning_rate": 5.531847133757961e-06,
"loss": 0.8992,
"step": 16550
},
{
"epoch": 18.76,
"learning_rate": 5.292993630573248e-06,
"loss": 0.8988,
"step": 16600
},
{
"epoch": 18.81,
"learning_rate": 5.054140127388535e-06,
"loss": 0.8885,
"step": 16650
},
{
"epoch": 18.87,
"learning_rate": 4.815286624203822e-06,
"loss": 0.8837,
"step": 16700
},
{
"epoch": 18.93,
"learning_rate": 4.576433121019108e-06,
"loss": 0.8922,
"step": 16750
},
{
"epoch": 18.93,
"eval_loss": 0.1962379515171051,
"eval_runtime": 444.5287,
"eval_samples_per_second": 13.965,
"eval_steps_per_second": 0.873,
"eval_wer": 0.17109307126776088,
"step": 16750
},
{
"epoch": 18.98,
"learning_rate": 4.337579617834394e-06,
"loss": 0.8943,
"step": 16800
},
{
"epoch": 19.04,
"learning_rate": 4.098726114649681e-06,
"loss": 0.9171,
"step": 16850
},
{
"epoch": 19.1,
"learning_rate": 3.859872611464968e-06,
"loss": 0.9144,
"step": 16900
},
{
"epoch": 19.15,
"learning_rate": 3.6210191082802544e-06,
"loss": 0.9517,
"step": 16950
},
{
"epoch": 19.21,
"learning_rate": 3.382165605095541e-06,
"loss": 0.8765,
"step": 17000
},
{
"epoch": 19.21,
"eval_loss": 0.19622743129730225,
"eval_runtime": 445.8046,
"eval_samples_per_second": 13.925,
"eval_steps_per_second": 0.87,
"eval_wer": 0.17106504619466215,
"step": 17000
},
{
"epoch": 19.27,
"learning_rate": 3.143312101910828e-06,
"loss": 0.9072,
"step": 17050
},
{
"epoch": 19.32,
"learning_rate": 2.9044585987261146e-06,
"loss": 0.8897,
"step": 17100
},
{
"epoch": 19.38,
"learning_rate": 2.6656050955414013e-06,
"loss": 0.8879,
"step": 17150
},
{
"epoch": 19.43,
"learning_rate": 2.426751592356688e-06,
"loss": 0.883,
"step": 17200
},
{
"epoch": 19.49,
"learning_rate": 2.1878980891719744e-06,
"loss": 0.8992,
"step": 17250
},
{
"epoch": 19.49,
"eval_loss": 0.19645148515701294,
"eval_runtime": 447.1526,
"eval_samples_per_second": 13.883,
"eval_steps_per_second": 0.868,
"eval_wer": 0.17034573598512803,
"step": 17250
},
{
"epoch": 19.55,
"learning_rate": 1.949044585987261e-06,
"loss": 0.8969,
"step": 17300
},
{
"epoch": 19.6,
"learning_rate": 1.7101910828025476e-06,
"loss": 0.872,
"step": 17350
},
{
"epoch": 19.66,
"learning_rate": 1.4713375796178341e-06,
"loss": 0.8984,
"step": 17400
},
{
"epoch": 19.72,
"learning_rate": 1.2324840764331209e-06,
"loss": 0.8913,
"step": 17450
},
{
"epoch": 19.77,
"learning_rate": 9.936305732484076e-07,
"loss": 0.8778,
"step": 17500
},
{
"epoch": 19.77,
"eval_loss": 0.19571013748645782,
"eval_runtime": 442.6975,
"eval_samples_per_second": 14.023,
"eval_steps_per_second": 0.876,
"eval_wer": 0.16990667650658123,
"step": 17500
},
{
"epoch": 19.83,
"learning_rate": 7.547770700636942e-07,
"loss": 0.8687,
"step": 17550
},
{
"epoch": 19.89,
"learning_rate": 5.159235668789809e-07,
"loss": 0.8858,
"step": 17600
},
{
"epoch": 19.94,
"learning_rate": 2.770700636942675e-07,
"loss": 0.8854,
"step": 17650
},
{
"epoch": 20.0,
"learning_rate": 3.821656050955413e-08,
"loss": 0.8898,
"step": 17700
},
{
"epoch": 20.0,
"step": 17700,
"total_flos": 2.3221664293970497e+20,
"train_loss": 1.2649466082470566,
"train_runtime": 115228.5602,
"train_samples_per_second": 9.837,
"train_steps_per_second": 0.154
}
],
"max_steps": 17700,
"num_train_epochs": 20,
"total_flos": 2.3221664293970497e+20,
"trial_name": null,
"trial_params": null
}