wav2vec2-xls-r-300m-german-de / trainer_state.json
AndrewMcDowell's picture
End of training
20de31b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.399048266485384,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.7125e-06,
"loss": 17.1443,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 7.4625e-06,
"loss": 11.1984,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 1.1212499999999998e-05,
"loss": 8.1205,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.49625e-05,
"loss": 6.904,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.8712499999999997e-05,
"loss": 5.7531,
"step": 500
},
{
"epoch": 0.04,
"eval_loss": 5.456444263458252,
"eval_runtime": 820.476,
"eval_samples_per_second": 19.388,
"eval_steps_per_second": 2.424,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 2.2462499999999997e-05,
"loss": 4.5704,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 2.6212499999999997e-05,
"loss": 3.686,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 2.99625e-05,
"loss": 3.1914,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 3.37125e-05,
"loss": 3.0347,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 3.7462499999999996e-05,
"loss": 2.9882,
"step": 1000
},
{
"epoch": 0.08,
"eval_loss": 3.00412654876709,
"eval_runtime": 780.8978,
"eval_samples_per_second": 20.37,
"eval_steps_per_second": 2.547,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 4.12125e-05,
"loss": 2.96,
"step": 1100
},
{
"epoch": 0.1,
"learning_rate": 4.4962499999999995e-05,
"loss": 2.9349,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 4.871249999999999e-05,
"loss": 2.9093,
"step": 1300
},
{
"epoch": 0.12,
"learning_rate": 5.2462499999999994e-05,
"loss": 2.7923,
"step": 1400
},
{
"epoch": 0.13,
"learning_rate": 5.62125e-05,
"loss": 2.1953,
"step": 1500
},
{
"epoch": 0.13,
"eval_loss": 1.1722890138626099,
"eval_runtime": 780.9363,
"eval_samples_per_second": 20.369,
"eval_steps_per_second": 2.547,
"eval_wer": 0.7120530803080308,
"step": 1500
},
{
"epoch": 0.14,
"learning_rate": 5.9962499999999994e-05,
"loss": 1.6768,
"step": 1600
},
{
"epoch": 0.14,
"learning_rate": 6.37125e-05,
"loss": 1.4414,
"step": 1700
},
{
"epoch": 0.15,
"learning_rate": 6.746249999999999e-05,
"loss": 1.3385,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 7.121249999999999e-05,
"loss": 1.2697,
"step": 1900
},
{
"epoch": 0.17,
"learning_rate": 7.49625e-05,
"loss": 1.2406,
"step": 2000
},
{
"epoch": 0.17,
"eval_loss": 0.3655729591846466,
"eval_runtime": 784.9435,
"eval_samples_per_second": 20.265,
"eval_steps_per_second": 2.534,
"eval_wer": 0.3622593509350935,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 7.498733799454298e-05,
"loss": 1.1985,
"step": 2100
},
{
"epoch": 0.19,
"learning_rate": 7.497454809004091e-05,
"loss": 1.1829,
"step": 2200
},
{
"epoch": 0.2,
"learning_rate": 7.496175818553888e-05,
"loss": 1.1421,
"step": 2300
},
{
"epoch": 0.2,
"learning_rate": 7.494896828103683e-05,
"loss": 1.1478,
"step": 2400
},
{
"epoch": 0.21,
"learning_rate": 7.493617837653478e-05,
"loss": 1.1294,
"step": 2500
},
{
"epoch": 0.21,
"eval_loss": 0.28432464599609375,
"eval_runtime": 777.4526,
"eval_samples_per_second": 20.46,
"eval_steps_per_second": 2.558,
"eval_wer": 0.29262926292629265,
"step": 2500
},
{
"epoch": 0.22,
"learning_rate": 7.492338847203274e-05,
"loss": 1.108,
"step": 2600
},
{
"epoch": 0.23,
"learning_rate": 7.491059856753069e-05,
"loss": 1.1031,
"step": 2700
},
{
"epoch": 0.24,
"learning_rate": 7.489780866302864e-05,
"loss": 1.0853,
"step": 2800
},
{
"epoch": 0.25,
"learning_rate": 7.488501875852659e-05,
"loss": 1.0775,
"step": 2900
},
{
"epoch": 0.25,
"learning_rate": 7.487222885402455e-05,
"loss": 1.0731,
"step": 3000
},
{
"epoch": 0.25,
"eval_loss": 0.2553892731666565,
"eval_runtime": 791.0753,
"eval_samples_per_second": 20.108,
"eval_steps_per_second": 2.514,
"eval_wer": 0.2663641364136414,
"step": 3000
},
{
"epoch": 0.26,
"learning_rate": 7.48594389495225e-05,
"loss": 1.0511,
"step": 3100
},
{
"epoch": 0.27,
"learning_rate": 7.484664904502046e-05,
"loss": 1.0618,
"step": 3200
},
{
"epoch": 0.28,
"learning_rate": 7.48338591405184e-05,
"loss": 1.0504,
"step": 3300
},
{
"epoch": 0.29,
"learning_rate": 7.482106923601637e-05,
"loss": 1.0445,
"step": 3400
},
{
"epoch": 0.3,
"learning_rate": 7.480827933151432e-05,
"loss": 1.051,
"step": 3500
},
{
"epoch": 0.3,
"eval_loss": 0.23866702616214752,
"eval_runtime": 778.9178,
"eval_samples_per_second": 20.422,
"eval_steps_per_second": 2.554,
"eval_wer": 0.25350660066006603,
"step": 3500
},
{
"epoch": 0.31,
"learning_rate": 7.479548942701227e-05,
"loss": 1.0441,
"step": 3600
},
{
"epoch": 0.31,
"learning_rate": 7.478269952251022e-05,
"loss": 1.0539,
"step": 3700
},
{
"epoch": 0.32,
"learning_rate": 7.476990961800818e-05,
"loss": 1.0458,
"step": 3800
},
{
"epoch": 0.33,
"learning_rate": 7.475711971350613e-05,
"loss": 1.0381,
"step": 3900
},
{
"epoch": 0.34,
"learning_rate": 7.474432980900408e-05,
"loss": 1.0479,
"step": 4000
},
{
"epoch": 0.34,
"eval_loss": 0.23452000319957733,
"eval_runtime": 783.957,
"eval_samples_per_second": 20.291,
"eval_steps_per_second": 2.537,
"eval_wer": 0.2511619911991199,
"step": 4000
},
{
"epoch": 0.35,
"learning_rate": 7.473153990450203e-05,
"loss": 1.0221,
"step": 4100
},
{
"epoch": 0.36,
"learning_rate": 7.471875e-05,
"loss": 1.0272,
"step": 4200
},
{
"epoch": 0.37,
"learning_rate": 7.470596009549795e-05,
"loss": 1.0043,
"step": 4300
},
{
"epoch": 0.37,
"learning_rate": 7.469329809004092e-05,
"loss": 1.0019,
"step": 4400
},
{
"epoch": 0.38,
"learning_rate": 7.468050818553888e-05,
"loss": 1.0026,
"step": 4500
},
{
"epoch": 0.38,
"eval_loss": 0.22704559564590454,
"eval_runtime": 787.7831,
"eval_samples_per_second": 20.192,
"eval_steps_per_second": 2.525,
"eval_wer": 0.245248899889989,
"step": 4500
},
{
"epoch": 0.39,
"learning_rate": 7.466771828103683e-05,
"loss": 0.994,
"step": 4600
},
{
"epoch": 0.4,
"learning_rate": 7.465492837653478e-05,
"loss": 1.0002,
"step": 4700
},
{
"epoch": 0.41,
"learning_rate": 7.464213847203273e-05,
"loss": 0.9863,
"step": 4800
},
{
"epoch": 0.42,
"learning_rate": 7.46293485675307e-05,
"loss": 0.9988,
"step": 4900
},
{
"epoch": 0.42,
"learning_rate": 7.461655866302864e-05,
"loss": 0.9921,
"step": 5000
},
{
"epoch": 0.42,
"eval_loss": 0.22116777300834656,
"eval_runtime": 786.501,
"eval_samples_per_second": 20.225,
"eval_steps_per_second": 2.529,
"eval_wer": 0.23532728272827283,
"step": 5000
},
{
"epoch": 0.43,
"learning_rate": 7.46037687585266e-05,
"loss": 0.9872,
"step": 5100
},
{
"epoch": 0.44,
"learning_rate": 7.459110675306958e-05,
"loss": 0.997,
"step": 5200
},
{
"epoch": 0.45,
"learning_rate": 7.457831684856753e-05,
"loss": 0.9853,
"step": 5300
},
{
"epoch": 0.46,
"learning_rate": 7.456552694406548e-05,
"loss": 0.9891,
"step": 5400
},
{
"epoch": 0.47,
"learning_rate": 7.455273703956343e-05,
"loss": 0.9839,
"step": 5500
},
{
"epoch": 0.47,
"eval_loss": 0.21411563456058502,
"eval_runtime": 788.0763,
"eval_samples_per_second": 20.185,
"eval_steps_per_second": 2.524,
"eval_wer": 0.23299642464246426,
"step": 5500
},
{
"epoch": 0.48,
"learning_rate": 7.453994713506138e-05,
"loss": 0.9851,
"step": 5600
},
{
"epoch": 0.48,
"learning_rate": 7.452715723055934e-05,
"loss": 0.9925,
"step": 5700
},
{
"epoch": 0.49,
"learning_rate": 7.451436732605729e-05,
"loss": 0.9894,
"step": 5800
},
{
"epoch": 0.5,
"learning_rate": 7.450157742155524e-05,
"loss": 1.0026,
"step": 5900
},
{
"epoch": 0.51,
"learning_rate": 7.448878751705319e-05,
"loss": 0.9907,
"step": 6000
},
{
"epoch": 0.51,
"eval_loss": 0.21215561032295227,
"eval_runtime": 782.9841,
"eval_samples_per_second": 20.316,
"eval_steps_per_second": 2.54,
"eval_wer": 0.23341584158415843,
"step": 6000
},
{
"epoch": 0.52,
"learning_rate": 7.447599761255116e-05,
"loss": 0.9939,
"step": 6100
},
{
"epoch": 0.53,
"learning_rate": 7.44632077080491e-05,
"loss": 0.9844,
"step": 6200
},
{
"epoch": 0.54,
"learning_rate": 7.445041780354707e-05,
"loss": 0.9783,
"step": 6300
},
{
"epoch": 0.54,
"learning_rate": 7.4437627899045e-05,
"loss": 0.9762,
"step": 6400
},
{
"epoch": 0.55,
"learning_rate": 7.442483799454297e-05,
"loss": 0.9788,
"step": 6500
},
{
"epoch": 0.55,
"eval_loss": 0.2113770842552185,
"eval_runtime": 788.5303,
"eval_samples_per_second": 20.173,
"eval_steps_per_second": 2.522,
"eval_wer": 0.2269595709570957,
"step": 6500
},
{
"epoch": 0.56,
"learning_rate": 7.441204809004092e-05,
"loss": 0.9892,
"step": 6600
},
{
"epoch": 0.57,
"learning_rate": 7.439925818553887e-05,
"loss": 0.9902,
"step": 6700
},
{
"epoch": 0.58,
"learning_rate": 7.438646828103683e-05,
"loss": 0.9724,
"step": 6800
},
{
"epoch": 0.59,
"learning_rate": 7.437367837653478e-05,
"loss": 0.9789,
"step": 6900
},
{
"epoch": 0.59,
"learning_rate": 7.436088847203273e-05,
"loss": 0.9687,
"step": 7000
},
{
"epoch": 0.59,
"eval_loss": 0.20659922063350677,
"eval_runtime": 780.1295,
"eval_samples_per_second": 20.39,
"eval_steps_per_second": 2.55,
"eval_wer": 0.23226760176017602,
"step": 7000
},
{
"epoch": 0.6,
"learning_rate": 7.434809856753068e-05,
"loss": 0.9756,
"step": 7100
},
{
"epoch": 0.61,
"learning_rate": 7.433530866302865e-05,
"loss": 0.9724,
"step": 7200
},
{
"epoch": 0.62,
"learning_rate": 7.43225187585266e-05,
"loss": 0.981,
"step": 7300
},
{
"epoch": 0.63,
"learning_rate": 7.430972885402455e-05,
"loss": 0.9703,
"step": 7400
},
{
"epoch": 0.64,
"learning_rate": 7.42969389495225e-05,
"loss": 0.9777,
"step": 7500
},
{
"epoch": 0.64,
"eval_loss": 0.20326362550258636,
"eval_runtime": 776.6863,
"eval_samples_per_second": 20.481,
"eval_steps_per_second": 2.561,
"eval_wer": 0.22365924092409242,
"step": 7500
},
{
"epoch": 0.65,
"learning_rate": 7.428427694406548e-05,
"loss": 0.9631,
"step": 7600
},
{
"epoch": 0.65,
"learning_rate": 7.427148703956343e-05,
"loss": 0.979,
"step": 7700
},
{
"epoch": 0.66,
"learning_rate": 7.425869713506138e-05,
"loss": 0.9708,
"step": 7800
},
{
"epoch": 0.67,
"learning_rate": 7.424590723055933e-05,
"loss": 0.979,
"step": 7900
},
{
"epoch": 0.68,
"learning_rate": 7.42331173260573e-05,
"loss": 0.9476,
"step": 8000
},
{
"epoch": 0.68,
"eval_loss": 0.2020493745803833,
"eval_runtime": 772.9733,
"eval_samples_per_second": 20.579,
"eval_steps_per_second": 2.573,
"eval_wer": 0.21941694169416942,
"step": 8000
},
{
"epoch": 0.69,
"learning_rate": 7.422032742155525e-05,
"loss": 0.9659,
"step": 8100
},
{
"epoch": 0.7,
"learning_rate": 7.420753751705321e-05,
"loss": 0.9559,
"step": 8200
},
{
"epoch": 0.71,
"learning_rate": 7.419474761255115e-05,
"loss": 0.9808,
"step": 8300
},
{
"epoch": 0.71,
"learning_rate": 7.418195770804911e-05,
"loss": 0.9678,
"step": 8400
},
{
"epoch": 0.72,
"learning_rate": 7.416916780354706e-05,
"loss": 0.9625,
"step": 8500
},
{
"epoch": 0.72,
"eval_loss": 0.19774506986141205,
"eval_runtime": 776.7494,
"eval_samples_per_second": 20.479,
"eval_steps_per_second": 2.561,
"eval_wer": 0.2191144114411441,
"step": 8500
},
{
"epoch": 0.73,
"learning_rate": 7.415637789904502e-05,
"loss": 0.966,
"step": 8600
},
{
"epoch": 0.74,
"learning_rate": 7.414358799454297e-05,
"loss": 0.9713,
"step": 8700
},
{
"epoch": 0.75,
"learning_rate": 7.413079809004092e-05,
"loss": 0.9686,
"step": 8800
},
{
"epoch": 0.76,
"learning_rate": 7.411800818553887e-05,
"loss": 0.9609,
"step": 8900
},
{
"epoch": 0.76,
"learning_rate": 7.410521828103682e-05,
"loss": 0.9497,
"step": 9000
},
{
"epoch": 0.76,
"eval_loss": 0.19759373366832733,
"eval_runtime": 778.1447,
"eval_samples_per_second": 20.442,
"eval_steps_per_second": 2.556,
"eval_wer": 0.21747112211221123,
"step": 9000
},
{
"epoch": 0.77,
"learning_rate": 7.409242837653479e-05,
"loss": 0.9637,
"step": 9100
},
{
"epoch": 0.78,
"learning_rate": 7.407963847203274e-05,
"loss": 0.9609,
"step": 9200
},
{
"epoch": 0.79,
"learning_rate": 7.406684856753069e-05,
"loss": 0.9628,
"step": 9300
},
{
"epoch": 0.8,
"learning_rate": 7.405405866302864e-05,
"loss": 0.9508,
"step": 9400
},
{
"epoch": 0.81,
"learning_rate": 7.40412687585266e-05,
"loss": 0.9781,
"step": 9500
},
{
"epoch": 0.81,
"eval_loss": 0.19555576145648956,
"eval_runtime": 780.7341,
"eval_samples_per_second": 20.374,
"eval_steps_per_second": 2.548,
"eval_wer": 0.21591721672167216,
"step": 9500
},
{
"epoch": 0.82,
"learning_rate": 7.402847885402455e-05,
"loss": 0.9567,
"step": 9600
},
{
"epoch": 0.82,
"learning_rate": 7.401581684856752e-05,
"loss": 0.9514,
"step": 9700
},
{
"epoch": 0.83,
"learning_rate": 7.400302694406547e-05,
"loss": 0.9612,
"step": 9800
},
{
"epoch": 0.84,
"learning_rate": 7.399023703956343e-05,
"loss": 0.9702,
"step": 9900
},
{
"epoch": 0.85,
"learning_rate": 7.397744713506138e-05,
"loss": 0.9552,
"step": 10000
},
{
"epoch": 0.85,
"eval_loss": 0.19579192996025085,
"eval_runtime": 785.2228,
"eval_samples_per_second": 20.258,
"eval_steps_per_second": 2.533,
"eval_wer": 0.2191144114411441,
"step": 10000
},
{
"epoch": 0.86,
"learning_rate": 7.396465723055934e-05,
"loss": 0.969,
"step": 10100
},
{
"epoch": 0.87,
"learning_rate": 7.395186732605729e-05,
"loss": 0.9607,
"step": 10200
},
{
"epoch": 0.88,
"learning_rate": 7.393907742155525e-05,
"loss": 0.9563,
"step": 10300
},
{
"epoch": 0.88,
"learning_rate": 7.39262875170532e-05,
"loss": 0.9601,
"step": 10400
},
{
"epoch": 0.89,
"learning_rate": 7.391349761255116e-05,
"loss": 0.9345,
"step": 10500
},
{
"epoch": 0.89,
"eval_loss": 0.196384459733963,
"eval_runtime": 782.7934,
"eval_samples_per_second": 20.321,
"eval_steps_per_second": 2.541,
"eval_wer": 0.21577970297029703,
"step": 10500
},
{
"epoch": 0.9,
"learning_rate": 7.39007077080491e-05,
"loss": 0.9636,
"step": 10600
},
{
"epoch": 0.91,
"learning_rate": 7.388791780354706e-05,
"loss": 0.9453,
"step": 10700
},
{
"epoch": 0.92,
"learning_rate": 7.387512789904501e-05,
"loss": 0.9475,
"step": 10800
},
{
"epoch": 0.93,
"learning_rate": 7.386233799454296e-05,
"loss": 0.9527,
"step": 10900
},
{
"epoch": 0.93,
"learning_rate": 7.384954809004093e-05,
"loss": 0.9528,
"step": 11000
},
{
"epoch": 0.93,
"eval_loss": 0.1925509124994278,
"eval_runtime": 787.0057,
"eval_samples_per_second": 20.212,
"eval_steps_per_second": 2.527,
"eval_wer": 0.21540154015401541,
"step": 11000
},
{
"epoch": 0.94,
"learning_rate": 7.383675818553888e-05,
"loss": 0.9517,
"step": 11100
},
{
"epoch": 0.95,
"learning_rate": 7.382396828103683e-05,
"loss": 0.9489,
"step": 11200
},
{
"epoch": 0.96,
"learning_rate": 7.381117837653478e-05,
"loss": 0.9652,
"step": 11300
},
{
"epoch": 0.97,
"learning_rate": 7.379838847203274e-05,
"loss": 0.9348,
"step": 11400
},
{
"epoch": 0.98,
"learning_rate": 7.378559856753069e-05,
"loss": 0.9502,
"step": 11500
},
{
"epoch": 0.98,
"eval_loss": 0.19526860117912292,
"eval_runtime": 785.5748,
"eval_samples_per_second": 20.249,
"eval_steps_per_second": 2.532,
"eval_wer": 0.21489961496149615,
"step": 11500
},
{
"epoch": 0.99,
"learning_rate": 7.377280866302864e-05,
"loss": 0.945,
"step": 11600
},
{
"epoch": 0.99,
"learning_rate": 7.376001875852659e-05,
"loss": 0.941,
"step": 11700
},
{
"epoch": 1.0,
"learning_rate": 7.374722885402455e-05,
"loss": 0.9155,
"step": 11800
},
{
"epoch": 1.01,
"learning_rate": 7.37344389495225e-05,
"loss": 0.936,
"step": 11900
},
{
"epoch": 1.02,
"learning_rate": 7.372177694406547e-05,
"loss": 0.9358,
"step": 12000
},
{
"epoch": 1.02,
"eval_loss": 0.19272169470787048,
"eval_runtime": 794.4472,
"eval_samples_per_second": 20.023,
"eval_steps_per_second": 2.504,
"eval_wer": 0.2167079207920792,
"step": 12000
},
{
"epoch": 1.03,
"learning_rate": 7.370898703956342e-05,
"loss": 0.9302,
"step": 12100
},
{
"epoch": 1.04,
"learning_rate": 7.369619713506139e-05,
"loss": 0.9437,
"step": 12200
},
{
"epoch": 1.05,
"learning_rate": 7.368340723055934e-05,
"loss": 0.9537,
"step": 12300
},
{
"epoch": 1.05,
"learning_rate": 7.36706173260573e-05,
"loss": 0.9364,
"step": 12400
},
{
"epoch": 1.06,
"learning_rate": 7.365782742155524e-05,
"loss": 0.941,
"step": 12500
},
{
"epoch": 1.06,
"eval_loss": 0.19010405242443085,
"eval_runtime": 786.4035,
"eval_samples_per_second": 20.228,
"eval_steps_per_second": 2.529,
"eval_wer": 0.21145489548954896,
"step": 12500
},
{
"epoch": 1.07,
"learning_rate": 7.36450375170532e-05,
"loss": 0.9385,
"step": 12600
},
{
"epoch": 1.08,
"learning_rate": 7.363224761255115e-05,
"loss": 0.9386,
"step": 12700
},
{
"epoch": 1.09,
"learning_rate": 7.361945770804912e-05,
"loss": 0.956,
"step": 12800
},
{
"epoch": 1.1,
"learning_rate": 7.360666780354705e-05,
"loss": 0.9407,
"step": 12900
},
{
"epoch": 1.1,
"learning_rate": 7.359387789904502e-05,
"loss": 0.9287,
"step": 13000
},
{
"epoch": 1.1,
"eval_loss": 0.1935959756374359,
"eval_runtime": 778.5995,
"eval_samples_per_second": 20.43,
"eval_steps_per_second": 2.555,
"eval_wer": 0.20895214521452146,
"step": 13000
},
{
"epoch": 1.11,
"learning_rate": 7.358108799454297e-05,
"loss": 0.9617,
"step": 13100
},
{
"epoch": 1.12,
"learning_rate": 7.356829809004092e-05,
"loss": 0.9321,
"step": 13200
},
{
"epoch": 1.13,
"learning_rate": 7.355550818553888e-05,
"loss": 0.9613,
"step": 13300
},
{
"epoch": 1.14,
"learning_rate": 7.354271828103683e-05,
"loss": 0.9436,
"step": 13400
},
{
"epoch": 1.15,
"learning_rate": 7.352992837653478e-05,
"loss": 0.9491,
"step": 13500
},
{
"epoch": 1.15,
"eval_loss": 0.1899876892566681,
"eval_runtime": 786.4087,
"eval_samples_per_second": 20.227,
"eval_steps_per_second": 2.529,
"eval_wer": 0.2104097909790979,
"step": 13500
},
{
"epoch": 1.16,
"learning_rate": 7.351713847203273e-05,
"loss": 0.9486,
"step": 13600
},
{
"epoch": 1.16,
"learning_rate": 7.35043485675307e-05,
"loss": 0.9348,
"step": 13700
},
{
"epoch": 1.17,
"learning_rate": 7.349155866302864e-05,
"loss": 0.9542,
"step": 13800
},
{
"epoch": 1.18,
"learning_rate": 7.34787687585266e-05,
"loss": 0.9483,
"step": 13900
},
{
"epoch": 1.19,
"learning_rate": 7.346597885402454e-05,
"loss": 0.9478,
"step": 14000
},
{
"epoch": 1.19,
"eval_loss": 0.19308479130268097,
"eval_runtime": 780.4777,
"eval_samples_per_second": 20.381,
"eval_steps_per_second": 2.548,
"eval_wer": 0.21197744774477448,
"step": 14000
},
{
"epoch": 1.2,
"learning_rate": 7.345331684856753e-05,
"loss": 0.9411,
"step": 14100
},
{
"epoch": 1.21,
"learning_rate": 7.34406548431105e-05,
"loss": 0.9335,
"step": 14200
},
{
"epoch": 1.22,
"learning_rate": 7.342786493860845e-05,
"loss": 0.9372,
"step": 14300
},
{
"epoch": 1.22,
"learning_rate": 7.341507503410641e-05,
"loss": 0.9339,
"step": 14400
},
{
"epoch": 1.23,
"learning_rate": 7.340228512960436e-05,
"loss": 0.946,
"step": 14500
},
{
"epoch": 1.23,
"eval_loss": 0.1913636177778244,
"eval_runtime": 795.396,
"eval_samples_per_second": 19.999,
"eval_steps_per_second": 2.501,
"eval_wer": 0.21338696369636964,
"step": 14500
},
{
"epoch": 1.24,
"learning_rate": 7.338949522510231e-05,
"loss": 0.9447,
"step": 14600
},
{
"epoch": 1.25,
"learning_rate": 7.337670532060026e-05,
"loss": 0.929,
"step": 14700
},
{
"epoch": 1.26,
"learning_rate": 7.336391541609821e-05,
"loss": 0.9413,
"step": 14800
},
{
"epoch": 1.27,
"learning_rate": 7.335112551159618e-05,
"loss": 0.9292,
"step": 14900
},
{
"epoch": 1.27,
"learning_rate": 7.333833560709413e-05,
"loss": 0.9499,
"step": 15000
},
{
"epoch": 1.27,
"eval_loss": 0.1930927187204361,
"eval_runtime": 803.3902,
"eval_samples_per_second": 19.8,
"eval_steps_per_second": 2.476,
"eval_wer": 0.21727172717271728,
"step": 15000
},
{
"epoch": 1.28,
"learning_rate": 7.332554570259209e-05,
"loss": 0.9406,
"step": 15100
},
{
"epoch": 1.29,
"learning_rate": 7.331275579809003e-05,
"loss": 0.9341,
"step": 15200
},
{
"epoch": 1.3,
"learning_rate": 7.329996589358799e-05,
"loss": 0.935,
"step": 15300
},
{
"epoch": 1.31,
"learning_rate": 7.328717598908594e-05,
"loss": 0.9426,
"step": 15400
},
{
"epoch": 1.32,
"learning_rate": 7.32743860845839e-05,
"loss": 0.9346,
"step": 15500
},
{
"epoch": 1.32,
"eval_loss": 0.19133125245571136,
"eval_runtime": 785.6947,
"eval_samples_per_second": 20.246,
"eval_steps_per_second": 2.532,
"eval_wer": 0.2104716721672167,
"step": 15500
},
{
"epoch": 1.33,
"learning_rate": 7.326159618008185e-05,
"loss": 0.9499,
"step": 15600
},
{
"epoch": 1.33,
"learning_rate": 7.32488062755798e-05,
"loss": 0.9295,
"step": 15700
},
{
"epoch": 1.34,
"learning_rate": 7.323601637107775e-05,
"loss": 0.9459,
"step": 15800
},
{
"epoch": 1.35,
"learning_rate": 7.32232264665757e-05,
"loss": 0.9271,
"step": 15900
},
{
"epoch": 1.36,
"learning_rate": 7.321043656207367e-05,
"loss": 0.9509,
"step": 16000
},
{
"epoch": 1.36,
"eval_loss": 0.19015146791934967,
"eval_runtime": 782.4331,
"eval_samples_per_second": 20.33,
"eval_steps_per_second": 2.542,
"eval_wer": 0.21373762376237623,
"step": 16000
},
{
"epoch": 1.37,
"learning_rate": 7.319764665757162e-05,
"loss": 0.938,
"step": 16100
},
{
"epoch": 1.38,
"learning_rate": 7.318485675306957e-05,
"loss": 0.9378,
"step": 16200
},
{
"epoch": 1.39,
"learning_rate": 7.317206684856752e-05,
"loss": 0.9266,
"step": 16300
},
{
"epoch": 1.39,
"learning_rate": 7.315927694406548e-05,
"loss": 0.9325,
"step": 16400
},
{
"epoch": 1.4,
"learning_rate": 7.314648703956343e-05,
"loss": 0.9294,
"step": 16500
},
{
"epoch": 1.4,
"eval_loss": 0.18954087793827057,
"eval_runtime": 786.5281,
"eval_samples_per_second": 20.224,
"eval_steps_per_second": 2.529,
"eval_wer": 0.20861523652365235,
"step": 16500
},
{
"epoch": 1.41,
"learning_rate": 7.31336971350614e-05,
"loss": 0.9421,
"step": 16600
},
{
"epoch": 1.42,
"learning_rate": 7.312103512960435e-05,
"loss": 0.9453,
"step": 16700
},
{
"epoch": 1.43,
"learning_rate": 7.310824522510231e-05,
"loss": 0.9295,
"step": 16800
},
{
"epoch": 1.44,
"learning_rate": 7.309545532060026e-05,
"loss": 0.9347,
"step": 16900
},
{
"epoch": 1.44,
"learning_rate": 7.308266541609823e-05,
"loss": 0.9418,
"step": 17000
},
{
"epoch": 1.44,
"eval_loss": 0.19125838577747345,
"eval_runtime": 791.8538,
"eval_samples_per_second": 20.088,
"eval_steps_per_second": 2.512,
"eval_wer": 0.2182549504950495,
"step": 17000
},
{
"epoch": 1.45,
"learning_rate": 7.306987551159617e-05,
"loss": 0.9383,
"step": 17100
},
{
"epoch": 1.46,
"learning_rate": 7.305708560709413e-05,
"loss": 0.923,
"step": 17200
},
{
"epoch": 1.47,
"learning_rate": 7.304429570259208e-05,
"loss": 0.9407,
"step": 17300
},
{
"epoch": 1.48,
"learning_rate": 7.303150579809004e-05,
"loss": 0.931,
"step": 17400
},
{
"epoch": 1.49,
"learning_rate": 7.301871589358798e-05,
"loss": 0.9302,
"step": 17500
},
{
"epoch": 1.49,
"eval_loss": 0.18843084573745728,
"eval_runtime": 784.6176,
"eval_samples_per_second": 20.274,
"eval_steps_per_second": 2.535,
"eval_wer": 0.21144114411441145,
"step": 17500
},
{
"epoch": 1.5,
"learning_rate": 7.300592598908594e-05,
"loss": 0.9399,
"step": 17600
},
{
"epoch": 1.5,
"learning_rate": 7.299313608458389e-05,
"loss": 0.947,
"step": 17700
},
{
"epoch": 1.51,
"learning_rate": 7.298034618008186e-05,
"loss": 0.9468,
"step": 17800
},
{
"epoch": 1.52,
"learning_rate": 7.29675562755798e-05,
"loss": 0.9374,
"step": 17900
},
{
"epoch": 1.53,
"learning_rate": 7.295476637107776e-05,
"loss": 0.9418,
"step": 18000
},
{
"epoch": 1.53,
"eval_loss": 0.1894412785768509,
"eval_runtime": 794.6127,
"eval_samples_per_second": 20.019,
"eval_steps_per_second": 2.503,
"eval_wer": 0.21082920792079207,
"step": 18000
},
{
"epoch": 1.54,
"learning_rate": 7.294197646657571e-05,
"loss": 0.9212,
"step": 18100
},
{
"epoch": 1.55,
"learning_rate": 7.292918656207366e-05,
"loss": 0.9321,
"step": 18200
},
{
"epoch": 1.56,
"learning_rate": 7.291639665757162e-05,
"loss": 0.9321,
"step": 18300
},
{
"epoch": 1.56,
"learning_rate": 7.290360675306957e-05,
"loss": 0.9203,
"step": 18400
},
{
"epoch": 1.57,
"learning_rate": 7.289081684856752e-05,
"loss": 0.9363,
"step": 18500
},
{
"epoch": 1.57,
"eval_loss": 0.18861933052539825,
"eval_runtime": 792.4762,
"eval_samples_per_second": 20.073,
"eval_steps_per_second": 2.51,
"eval_wer": 0.21319444444444444,
"step": 18500
},
{
"epoch": 1.58,
"learning_rate": 7.287802694406547e-05,
"loss": 0.9392,
"step": 18600
},
{
"epoch": 1.59,
"learning_rate": 7.286523703956343e-05,
"loss": 0.9211,
"step": 18700
},
{
"epoch": 1.6,
"learning_rate": 7.28525750341064e-05,
"loss": 0.9245,
"step": 18800
},
{
"epoch": 1.61,
"learning_rate": 7.283978512960435e-05,
"loss": 0.9421,
"step": 18900
},
{
"epoch": 1.61,
"learning_rate": 7.28269952251023e-05,
"loss": 0.9338,
"step": 19000
},
{
"epoch": 1.61,
"eval_loss": 0.18555332720279694,
"eval_runtime": 790.487,
"eval_samples_per_second": 20.123,
"eval_steps_per_second": 2.516,
"eval_wer": 0.20778327832783278,
"step": 19000
},
{
"epoch": 1.62,
"learning_rate": 7.281420532060027e-05,
"loss": 0.9221,
"step": 19100
},
{
"epoch": 1.63,
"learning_rate": 7.280141541609822e-05,
"loss": 0.9176,
"step": 19200
},
{
"epoch": 1.64,
"learning_rate": 7.278862551159618e-05,
"loss": 0.914,
"step": 19300
},
{
"epoch": 1.65,
"learning_rate": 7.277583560709412e-05,
"loss": 0.9248,
"step": 19400
},
{
"epoch": 1.66,
"learning_rate": 7.276304570259208e-05,
"loss": 0.9185,
"step": 19500
},
{
"epoch": 1.66,
"eval_loss": 0.18515856564044952,
"eval_runtime": 792.1602,
"eval_samples_per_second": 20.081,
"eval_steps_per_second": 2.511,
"eval_wer": 0.20556243124312432,
"step": 19500
},
{
"epoch": 1.67,
"learning_rate": 7.275025579809003e-05,
"loss": 0.9147,
"step": 19600
},
{
"epoch": 1.67,
"learning_rate": 7.2737465893588e-05,
"loss": 0.924,
"step": 19700
},
{
"epoch": 1.68,
"learning_rate": 7.272467598908595e-05,
"loss": 0.9247,
"step": 19800
},
{
"epoch": 1.69,
"learning_rate": 7.27118860845839e-05,
"loss": 0.9331,
"step": 19900
},
{
"epoch": 1.7,
"learning_rate": 7.269909618008185e-05,
"loss": 0.9216,
"step": 20000
},
{
"epoch": 1.7,
"eval_loss": 0.18740816414356232,
"eval_runtime": 783.109,
"eval_samples_per_second": 20.313,
"eval_steps_per_second": 2.54,
"eval_wer": 0.2095365786578658,
"step": 20000
},
{
"epoch": 1.71,
"learning_rate": 7.26863062755798e-05,
"loss": 0.93,
"step": 20100
},
{
"epoch": 1.72,
"learning_rate": 7.267351637107776e-05,
"loss": 0.9116,
"step": 20200
},
{
"epoch": 1.73,
"learning_rate": 7.266072646657571e-05,
"loss": 0.9348,
"step": 20300
},
{
"epoch": 1.73,
"learning_rate": 7.264793656207366e-05,
"loss": 0.9223,
"step": 20400
},
{
"epoch": 1.74,
"learning_rate": 7.263514665757161e-05,
"loss": 0.9176,
"step": 20500
},
{
"epoch": 1.74,
"eval_loss": 0.18728592991828918,
"eval_runtime": 790.4913,
"eval_samples_per_second": 20.123,
"eval_steps_per_second": 2.516,
"eval_wer": 0.2078107810781078,
"step": 20500
},
{
"epoch": 1.75,
"learning_rate": 7.262235675306957e-05,
"loss": 0.9346,
"step": 20600
},
{
"epoch": 1.76,
"learning_rate": 7.260956684856752e-05,
"loss": 0.92,
"step": 20700
},
{
"epoch": 1.77,
"learning_rate": 7.259677694406547e-05,
"loss": 0.929,
"step": 20800
},
{
"epoch": 1.78,
"learning_rate": 7.258398703956342e-05,
"loss": 0.9235,
"step": 20900
},
{
"epoch": 1.78,
"learning_rate": 7.257119713506139e-05,
"loss": 0.9288,
"step": 21000
},
{
"epoch": 1.78,
"eval_loss": 0.1864841878414154,
"eval_runtime": 787.9985,
"eval_samples_per_second": 20.187,
"eval_steps_per_second": 2.524,
"eval_wer": 0.2097428492849285,
"step": 21000
},
{
"epoch": 1.79,
"learning_rate": 7.255840723055934e-05,
"loss": 0.9205,
"step": 21100
},
{
"epoch": 1.8,
"learning_rate": 7.25456173260573e-05,
"loss": 0.9154,
"step": 21200
},
{
"epoch": 1.81,
"learning_rate": 7.253282742155524e-05,
"loss": 0.924,
"step": 21300
},
{
"epoch": 1.82,
"learning_rate": 7.25200375170532e-05,
"loss": 0.9245,
"step": 21400
},
{
"epoch": 1.83,
"learning_rate": 7.250724761255115e-05,
"loss": 0.9278,
"step": 21500
},
{
"epoch": 1.83,
"eval_loss": 0.18693457543849945,
"eval_runtime": 790.2361,
"eval_samples_per_second": 20.129,
"eval_steps_per_second": 2.517,
"eval_wer": 0.20997662266226622,
"step": 21500
},
{
"epoch": 1.84,
"learning_rate": 7.24944577080491e-05,
"loss": 0.9339,
"step": 21600
},
{
"epoch": 1.84,
"learning_rate": 7.248166780354707e-05,
"loss": 0.9207,
"step": 21700
},
{
"epoch": 1.85,
"learning_rate": 7.246887789904502e-05,
"loss": 0.9267,
"step": 21800
},
{
"epoch": 1.86,
"learning_rate": 7.245608799454297e-05,
"loss": 0.9259,
"step": 21900
},
{
"epoch": 1.87,
"learning_rate": 7.244329809004092e-05,
"loss": 0.9295,
"step": 22000
},
{
"epoch": 1.87,
"eval_loss": 0.18780598044395447,
"eval_runtime": 781.1723,
"eval_samples_per_second": 20.363,
"eval_steps_per_second": 2.546,
"eval_wer": 0.2095365786578658,
"step": 22000
},
{
"epoch": 1.88,
"learning_rate": 7.243050818553888e-05,
"loss": 0.9345,
"step": 22100
},
{
"epoch": 1.89,
"learning_rate": 7.241771828103683e-05,
"loss": 0.9321,
"step": 22200
},
{
"epoch": 1.89,
"learning_rate": 7.240492837653478e-05,
"loss": 0.9238,
"step": 22300
},
{
"epoch": 1.9,
"learning_rate": 7.239213847203273e-05,
"loss": 0.9232,
"step": 22400
},
{
"epoch": 1.91,
"learning_rate": 7.237947646657571e-05,
"loss": 0.9221,
"step": 22500
},
{
"epoch": 1.91,
"eval_loss": 0.18524181842803955,
"eval_runtime": 788.3977,
"eval_samples_per_second": 20.176,
"eval_steps_per_second": 2.523,
"eval_wer": 0.21208058305830582,
"step": 22500
},
{
"epoch": 1.92,
"learning_rate": 7.236668656207366e-05,
"loss": 0.9282,
"step": 22600
},
{
"epoch": 1.93,
"learning_rate": 7.235389665757161e-05,
"loss": 0.9235,
"step": 22700
},
{
"epoch": 1.94,
"learning_rate": 7.234110675306956e-05,
"loss": 0.9362,
"step": 22800
},
{
"epoch": 1.95,
"learning_rate": 7.232831684856753e-05,
"loss": 0.9312,
"step": 22900
},
{
"epoch": 1.95,
"learning_rate": 7.231552694406548e-05,
"loss": 0.924,
"step": 23000
},
{
"epoch": 1.95,
"eval_loss": 0.18553005158901215,
"eval_runtime": 795.127,
"eval_samples_per_second": 20.006,
"eval_steps_per_second": 2.501,
"eval_wer": 0.20418729372937294,
"step": 23000
},
{
"epoch": 1.96,
"learning_rate": 7.230273703956344e-05,
"loss": 0.923,
"step": 23100
},
{
"epoch": 1.97,
"learning_rate": 7.228994713506138e-05,
"loss": 0.9308,
"step": 23200
},
{
"epoch": 1.98,
"learning_rate": 7.227715723055934e-05,
"loss": 0.9188,
"step": 23300
},
{
"epoch": 1.99,
"learning_rate": 7.226436732605729e-05,
"loss": 0.9302,
"step": 23400
},
{
"epoch": 2.0,
"learning_rate": 7.225157742155524e-05,
"loss": 0.9104,
"step": 23500
},
{
"epoch": 2.0,
"eval_loss": 0.18578162789344788,
"eval_runtime": 782.1788,
"eval_samples_per_second": 20.337,
"eval_steps_per_second": 2.543,
"eval_wer": 0.21046479647964797,
"step": 23500
},
{
"epoch": 2.01,
"learning_rate": 7.22387875170532e-05,
"loss": 0.9344,
"step": 23600
},
{
"epoch": 2.01,
"learning_rate": 7.222599761255116e-05,
"loss": 0.9251,
"step": 23700
},
{
"epoch": 2.02,
"learning_rate": 7.22132077080491e-05,
"loss": 0.9193,
"step": 23800
},
{
"epoch": 2.03,
"learning_rate": 7.220041780354706e-05,
"loss": 0.9253,
"step": 23900
},
{
"epoch": 2.04,
"learning_rate": 7.218762789904502e-05,
"loss": 0.9284,
"step": 24000
},
{
"epoch": 2.04,
"eval_loss": 0.18499909341335297,
"eval_runtime": 795.2327,
"eval_samples_per_second": 20.003,
"eval_steps_per_second": 2.501,
"eval_wer": 0.20799642464246423,
"step": 24000
},
{
"epoch": 2.05,
"learning_rate": 7.217483799454297e-05,
"loss": 0.9262,
"step": 24100
},
{
"epoch": 2.06,
"learning_rate": 7.216204809004092e-05,
"loss": 0.9169,
"step": 24200
},
{
"epoch": 2.06,
"learning_rate": 7.214925818553887e-05,
"loss": 0.9288,
"step": 24300
},
{
"epoch": 2.07,
"learning_rate": 7.213646828103683e-05,
"loss": 0.9054,
"step": 24400
},
{
"epoch": 2.08,
"learning_rate": 7.212367837653478e-05,
"loss": 0.9162,
"step": 24500
},
{
"epoch": 2.08,
"eval_loss": 0.1839156299829483,
"eval_runtime": 786.0752,
"eval_samples_per_second": 20.236,
"eval_steps_per_second": 2.53,
"eval_wer": 0.20453795379537953,
"step": 24500
},
{
"epoch": 2.09,
"learning_rate": 7.211101637107775e-05,
"loss": 0.925,
"step": 24600
},
{
"epoch": 2.1,
"learning_rate": 7.20982264665757e-05,
"loss": 0.9107,
"step": 24700
},
{
"epoch": 2.11,
"learning_rate": 7.208543656207367e-05,
"loss": 0.9044,
"step": 24800
},
{
"epoch": 2.12,
"learning_rate": 7.207264665757162e-05,
"loss": 0.9215,
"step": 24900
},
{
"epoch": 2.12,
"learning_rate": 7.205985675306957e-05,
"loss": 0.9111,
"step": 25000
},
{
"epoch": 2.12,
"eval_loss": 0.1838468611240387,
"eval_runtime": 782.8069,
"eval_samples_per_second": 20.32,
"eval_steps_per_second": 2.541,
"eval_wer": 0.2079689218921892,
"step": 25000
},
{
"epoch": 2.13,
"learning_rate": 7.204706684856752e-05,
"loss": 0.9254,
"step": 25100
},
{
"epoch": 2.14,
"learning_rate": 7.203427694406548e-05,
"loss": 0.9195,
"step": 25200
},
{
"epoch": 2.15,
"learning_rate": 7.202148703956343e-05,
"loss": 0.9224,
"step": 25300
},
{
"epoch": 2.16,
"learning_rate": 7.20086971350614e-05,
"loss": 0.9321,
"step": 25400
},
{
"epoch": 2.17,
"learning_rate": 7.199590723055933e-05,
"loss": 0.91,
"step": 25500
},
{
"epoch": 2.17,
"eval_loss": 0.1888788938522339,
"eval_runtime": 788.8222,
"eval_samples_per_second": 20.166,
"eval_steps_per_second": 2.521,
"eval_wer": 0.21059543454345434,
"step": 25500
},
{
"epoch": 2.18,
"learning_rate": 7.19831173260573e-05,
"loss": 0.9046,
"step": 25600
},
{
"epoch": 2.18,
"learning_rate": 7.197032742155524e-05,
"loss": 0.903,
"step": 25700
},
{
"epoch": 2.19,
"learning_rate": 7.19575375170532e-05,
"loss": 0.9111,
"step": 25800
},
{
"epoch": 2.2,
"learning_rate": 7.194474761255116e-05,
"loss": 0.9197,
"step": 25900
},
{
"epoch": 2.21,
"learning_rate": 7.193195770804911e-05,
"loss": 0.9152,
"step": 26000
},
{
"epoch": 2.21,
"eval_loss": 0.1855948269367218,
"eval_runtime": 787.8992,
"eval_samples_per_second": 20.189,
"eval_steps_per_second": 2.524,
"eval_wer": 0.20256463146314632,
"step": 26000
},
{
"epoch": 2.22,
"learning_rate": 7.191929570259208e-05,
"loss": 0.9145,
"step": 26100
},
{
"epoch": 2.23,
"learning_rate": 7.190650579809004e-05,
"loss": 0.9243,
"step": 26200
},
{
"epoch": 2.23,
"learning_rate": 7.189371589358799e-05,
"loss": 0.9118,
"step": 26300
},
{
"epoch": 2.24,
"learning_rate": 7.188092598908594e-05,
"loss": 0.8995,
"step": 26400
},
{
"epoch": 2.25,
"learning_rate": 7.186813608458389e-05,
"loss": 0.9209,
"step": 26500
},
{
"epoch": 2.25,
"eval_loss": 0.1891188770532608,
"eval_runtime": 785.3595,
"eval_samples_per_second": 20.254,
"eval_steps_per_second": 2.533,
"eval_wer": 0.21331820682068206,
"step": 26500
},
{
"epoch": 2.26,
"learning_rate": 7.185534618008184e-05,
"loss": 0.9184,
"step": 26600
},
{
"epoch": 2.27,
"learning_rate": 7.18425562755798e-05,
"loss": 0.9132,
"step": 26700
},
{
"epoch": 2.28,
"learning_rate": 7.182976637107776e-05,
"loss": 0.9208,
"step": 26800
},
{
"epoch": 2.29,
"learning_rate": 7.18169764665757e-05,
"loss": 0.9125,
"step": 26900
},
{
"epoch": 2.29,
"learning_rate": 7.180418656207366e-05,
"loss": 0.9094,
"step": 27000
},
{
"epoch": 2.29,
"eval_loss": 0.1856813281774521,
"eval_runtime": 782.4289,
"eval_samples_per_second": 20.33,
"eval_steps_per_second": 2.542,
"eval_wer": 0.20886963696369637,
"step": 27000
},
{
"epoch": 2.3,
"learning_rate": 7.179139665757162e-05,
"loss": 0.918,
"step": 27100
},
{
"epoch": 2.31,
"learning_rate": 7.177860675306957e-05,
"loss": 0.924,
"step": 27200
},
{
"epoch": 2.32,
"learning_rate": 7.176581684856753e-05,
"loss": 0.92,
"step": 27300
},
{
"epoch": 2.33,
"learning_rate": 7.175302694406547e-05,
"loss": 0.9133,
"step": 27400
},
{
"epoch": 2.34,
"learning_rate": 7.174023703956343e-05,
"loss": 0.9065,
"step": 27500
},
{
"epoch": 2.34,
"eval_loss": 0.18404491245746613,
"eval_runtime": 785.6812,
"eval_samples_per_second": 20.246,
"eval_steps_per_second": 2.532,
"eval_wer": 0.20523239823982398,
"step": 27500
},
{
"epoch": 2.35,
"learning_rate": 7.172744713506138e-05,
"loss": 0.9062,
"step": 27600
},
{
"epoch": 2.35,
"learning_rate": 7.171465723055935e-05,
"loss": 0.9149,
"step": 27700
},
{
"epoch": 2.36,
"learning_rate": 7.170186732605728e-05,
"loss": 0.9039,
"step": 27800
},
{
"epoch": 2.37,
"learning_rate": 7.168907742155525e-05,
"loss": 0.9135,
"step": 27900
},
{
"epoch": 2.38,
"learning_rate": 7.16762875170532e-05,
"loss": 0.9156,
"step": 28000
},
{
"epoch": 2.38,
"eval_loss": 0.1833326518535614,
"eval_runtime": 782.8158,
"eval_samples_per_second": 20.32,
"eval_steps_per_second": 2.541,
"eval_wer": 0.20625,
"step": 28000
},
{
"epoch": 2.39,
"learning_rate": 7.166349761255115e-05,
"loss": 0.9129,
"step": 28100
},
{
"epoch": 2.4,
"learning_rate": 7.165070770804911e-05,
"loss": 0.9133,
"step": 28200
},
{
"epoch": 2.4,
"learning_rate": 7.163791780354706e-05,
"loss": 0.9041,
"step": 28300
},
{
"epoch": 2.41,
"learning_rate": 7.162512789904501e-05,
"loss": 0.9181,
"step": 28400
},
{
"epoch": 2.42,
"learning_rate": 7.161233799454296e-05,
"loss": 0.8986,
"step": 28500
},
{
"epoch": 2.42,
"eval_loss": 0.17894455790519714,
"eval_runtime": 791.052,
"eval_samples_per_second": 20.109,
"eval_steps_per_second": 2.514,
"eval_wer": 0.20006875687568756,
"step": 28500
},
{
"epoch": 2.43,
"learning_rate": 7.159954809004093e-05,
"loss": 0.9008,
"step": 28600
},
{
"epoch": 2.44,
"learning_rate": 7.158675818553888e-05,
"loss": 0.9029,
"step": 28700
},
{
"epoch": 2.45,
"learning_rate": 7.157396828103683e-05,
"loss": 0.9021,
"step": 28800
},
{
"epoch": 2.46,
"learning_rate": 7.156117837653478e-05,
"loss": 0.9028,
"step": 28900
},
{
"epoch": 2.46,
"learning_rate": 7.154838847203274e-05,
"loss": 0.9045,
"step": 29000
},
{
"epoch": 2.46,
"eval_loss": 0.17688600718975067,
"eval_runtime": 791.874,
"eval_samples_per_second": 20.088,
"eval_steps_per_second": 2.512,
"eval_wer": 0.20222084708470847,
"step": 29000
},
{
"epoch": 2.47,
"learning_rate": 7.153559856753069e-05,
"loss": 0.9117,
"step": 29100
},
{
"epoch": 2.48,
"learning_rate": 7.152280866302864e-05,
"loss": 0.9241,
"step": 29200
},
{
"epoch": 2.49,
"learning_rate": 7.151001875852659e-05,
"loss": 0.9107,
"step": 29300
},
{
"epoch": 2.5,
"learning_rate": 7.149722885402455e-05,
"loss": 0.9083,
"step": 29400
},
{
"epoch": 2.51,
"learning_rate": 7.14844389495225e-05,
"loss": 0.9039,
"step": 29500
},
{
"epoch": 2.51,
"eval_loss": 0.18187400698661804,
"eval_runtime": 793.1992,
"eval_samples_per_second": 20.054,
"eval_steps_per_second": 2.508,
"eval_wer": 0.20726760176017603,
"step": 29500
},
{
"epoch": 2.52,
"learning_rate": 7.147164904502045e-05,
"loss": 0.8957,
"step": 29600
},
{
"epoch": 2.52,
"learning_rate": 7.145885914051842e-05,
"loss": 0.9088,
"step": 29700
},
{
"epoch": 2.53,
"learning_rate": 7.144606923601637e-05,
"loss": 0.9208,
"step": 29800
},
{
"epoch": 2.54,
"learning_rate": 7.143327933151432e-05,
"loss": 0.8972,
"step": 29900
},
{
"epoch": 2.55,
"learning_rate": 7.142048942701227e-05,
"loss": 0.9145,
"step": 30000
},
{
"epoch": 2.55,
"eval_loss": 0.182762011885643,
"eval_runtime": 776.5063,
"eval_samples_per_second": 20.485,
"eval_steps_per_second": 2.561,
"eval_wer": 0.20626375137513753,
"step": 30000
},
{
"epoch": 2.56,
"learning_rate": 7.140769952251023e-05,
"loss": 0.9034,
"step": 30100
},
{
"epoch": 2.57,
"learning_rate": 7.139490961800818e-05,
"loss": 0.9201,
"step": 30200
},
{
"epoch": 2.57,
"learning_rate": 7.138211971350613e-05,
"loss": 0.9018,
"step": 30300
},
{
"epoch": 2.58,
"learning_rate": 7.136932980900408e-05,
"loss": 0.897,
"step": 30400
},
{
"epoch": 2.59,
"learning_rate": 7.135653990450205e-05,
"loss": 0.9081,
"step": 30500
},
{
"epoch": 2.59,
"eval_loss": 0.1811380684375763,
"eval_runtime": 783.2545,
"eval_samples_per_second": 20.309,
"eval_steps_per_second": 2.539,
"eval_wer": 0.20488861386138613,
"step": 30500
},
{
"epoch": 2.6,
"learning_rate": 7.134375e-05,
"loss": 0.9103,
"step": 30600
},
{
"epoch": 2.61,
"learning_rate": 7.133096009549795e-05,
"loss": 0.9189,
"step": 30700
},
{
"epoch": 2.62,
"learning_rate": 7.131829809004092e-05,
"loss": 0.9175,
"step": 30800
},
{
"epoch": 2.63,
"learning_rate": 7.130550818553888e-05,
"loss": 0.8983,
"step": 30900
},
{
"epoch": 2.63,
"learning_rate": 7.129271828103683e-05,
"loss": 0.9252,
"step": 31000
},
{
"epoch": 2.63,
"eval_loss": 0.183289036154747,
"eval_runtime": 775.833,
"eval_samples_per_second": 20.503,
"eval_steps_per_second": 2.564,
"eval_wer": 0.20862211221122112,
"step": 31000
},
{
"epoch": 2.64,
"learning_rate": 7.127992837653478e-05,
"loss": 0.9191,
"step": 31100
},
{
"epoch": 2.65,
"learning_rate": 7.126713847203273e-05,
"loss": 0.9077,
"step": 31200
},
{
"epoch": 2.66,
"learning_rate": 7.12543485675307e-05,
"loss": 0.899,
"step": 31300
},
{
"epoch": 2.67,
"learning_rate": 7.124155866302864e-05,
"loss": 0.909,
"step": 31400
},
{
"epoch": 2.68,
"learning_rate": 7.12287687585266e-05,
"loss": 0.8957,
"step": 31500
},
{
"epoch": 2.68,
"eval_loss": 0.17950163781642914,
"eval_runtime": 779.0687,
"eval_samples_per_second": 20.418,
"eval_steps_per_second": 2.553,
"eval_wer": 0.20833333333333334,
"step": 31500
},
{
"epoch": 2.69,
"learning_rate": 7.121597885402454e-05,
"loss": 0.91,
"step": 31600
},
{
"epoch": 2.69,
"learning_rate": 7.120318894952251e-05,
"loss": 0.9041,
"step": 31700
},
{
"epoch": 2.7,
"learning_rate": 7.119039904502046e-05,
"loss": 0.899,
"step": 31800
},
{
"epoch": 2.71,
"learning_rate": 7.117760914051841e-05,
"loss": 0.9172,
"step": 31900
},
{
"epoch": 2.72,
"learning_rate": 7.116481923601637e-05,
"loss": 0.891,
"step": 32000
},
{
"epoch": 2.72,
"eval_loss": 0.18092180788516998,
"eval_runtime": 776.2498,
"eval_samples_per_second": 20.492,
"eval_steps_per_second": 2.562,
"eval_wer": 0.20578245324532454,
"step": 32000
},
{
"epoch": 2.73,
"learning_rate": 7.115202933151432e-05,
"loss": 0.8962,
"step": 32100
},
{
"epoch": 2.74,
"learning_rate": 7.113923942701227e-05,
"loss": 0.9036,
"step": 32200
},
{
"epoch": 2.74,
"learning_rate": 7.112644952251022e-05,
"loss": 0.9097,
"step": 32300
},
{
"epoch": 2.75,
"learning_rate": 7.111365961800819e-05,
"loss": 0.9188,
"step": 32400
},
{
"epoch": 2.76,
"learning_rate": 7.110086971350614e-05,
"loss": 0.9023,
"step": 32500
},
{
"epoch": 2.76,
"eval_loss": 0.1811733990907669,
"eval_runtime": 770.8233,
"eval_samples_per_second": 20.636,
"eval_steps_per_second": 2.58,
"eval_wer": 0.20608498349834983,
"step": 32500
},
{
"epoch": 2.77,
"learning_rate": 7.108807980900409e-05,
"loss": 0.9044,
"step": 32600
},
{
"epoch": 2.78,
"learning_rate": 7.107528990450204e-05,
"loss": 0.904,
"step": 32700
},
{
"epoch": 2.79,
"learning_rate": 7.10625e-05,
"loss": 0.9041,
"step": 32800
},
{
"epoch": 2.8,
"learning_rate": 7.104971009549795e-05,
"loss": 0.8982,
"step": 32900
},
{
"epoch": 2.8,
"learning_rate": 7.10369201909959e-05,
"loss": 0.8918,
"step": 33000
},
{
"epoch": 2.8,
"eval_loss": 0.17748048901557922,
"eval_runtime": 774.9097,
"eval_samples_per_second": 20.528,
"eval_steps_per_second": 2.567,
"eval_wer": 0.19973184818481848,
"step": 33000
},
{
"epoch": 2.81,
"learning_rate": 7.102413028649385e-05,
"loss": 0.9032,
"step": 33100
},
{
"epoch": 2.82,
"learning_rate": 7.101134038199181e-05,
"loss": 0.8998,
"step": 33200
},
{
"epoch": 2.83,
"learning_rate": 7.099867837653478e-05,
"loss": 0.912,
"step": 33300
},
{
"epoch": 2.84,
"learning_rate": 7.098588847203273e-05,
"loss": 0.9047,
"step": 33400
},
{
"epoch": 2.85,
"learning_rate": 7.097309856753068e-05,
"loss": 0.8852,
"step": 33500
},
{
"epoch": 2.85,
"eval_loss": 0.17901700735092163,
"eval_runtime": 786.841,
"eval_samples_per_second": 20.216,
"eval_steps_per_second": 2.528,
"eval_wer": 0.19968371837183718,
"step": 33500
},
{
"epoch": 2.86,
"learning_rate": 7.096030866302865e-05,
"loss": 0.9092,
"step": 33600
},
{
"epoch": 2.86,
"learning_rate": 7.09475187585266e-05,
"loss": 0.9125,
"step": 33700
},
{
"epoch": 2.87,
"learning_rate": 7.093472885402455e-05,
"loss": 0.9014,
"step": 33800
},
{
"epoch": 2.88,
"learning_rate": 7.09219389495225e-05,
"loss": 0.903,
"step": 33900
},
{
"epoch": 2.89,
"learning_rate": 7.090914904502046e-05,
"loss": 0.8928,
"step": 34000
},
{
"epoch": 2.89,
"eval_loss": 0.17667756974697113,
"eval_runtime": 792.1628,
"eval_samples_per_second": 20.08,
"eval_steps_per_second": 2.511,
"eval_wer": 0.20127887788778878,
"step": 34000
},
{
"epoch": 2.9,
"learning_rate": 7.089635914051841e-05,
"loss": 0.9067,
"step": 34100
},
{
"epoch": 2.91,
"learning_rate": 7.088356923601636e-05,
"loss": 0.8958,
"step": 34200
},
{
"epoch": 2.91,
"learning_rate": 7.087077933151432e-05,
"loss": 0.9184,
"step": 34300
},
{
"epoch": 2.92,
"learning_rate": 7.085798942701227e-05,
"loss": 0.9056,
"step": 34400
},
{
"epoch": 2.93,
"learning_rate": 7.084519952251023e-05,
"loss": 0.9079,
"step": 34500
},
{
"epoch": 2.93,
"eval_loss": 0.17351186275482178,
"eval_runtime": 788.8014,
"eval_samples_per_second": 20.166,
"eval_steps_per_second": 2.522,
"eval_wer": 0.19860423542354236,
"step": 34500
},
{
"epoch": 2.94,
"learning_rate": 7.083240961800818e-05,
"loss": 0.9016,
"step": 34600
},
{
"epoch": 2.95,
"learning_rate": 7.081961971350614e-05,
"loss": 0.9004,
"step": 34700
},
{
"epoch": 2.96,
"learning_rate": 7.080682980900409e-05,
"loss": 0.8833,
"step": 34800
},
{
"epoch": 2.97,
"learning_rate": 7.079403990450204e-05,
"loss": 0.8837,
"step": 34900
},
{
"epoch": 2.97,
"learning_rate": 7.078124999999999e-05,
"loss": 0.9032,
"step": 35000
},
{
"epoch": 2.97,
"eval_loss": 0.1792880892753601,
"eval_runtime": 788.4702,
"eval_samples_per_second": 20.175,
"eval_steps_per_second": 2.523,
"eval_wer": 0.2024064906490649,
"step": 35000
},
{
"epoch": 2.98,
"learning_rate": 7.076846009549795e-05,
"loss": 0.9036,
"step": 35100
},
{
"epoch": 2.99,
"learning_rate": 7.07556701909959e-05,
"loss": 0.9097,
"step": 35200
},
{
"epoch": 3.0,
"learning_rate": 7.074300818553887e-05,
"loss": 0.9154,
"step": 35300
},
{
"epoch": 3.01,
"learning_rate": 7.073021828103682e-05,
"loss": 0.8799,
"step": 35400
},
{
"epoch": 3.02,
"learning_rate": 7.071742837653479e-05,
"loss": 0.9018,
"step": 35500
},
{
"epoch": 3.02,
"eval_loss": 0.17777115106582642,
"eval_runtime": 788.8183,
"eval_samples_per_second": 20.166,
"eval_steps_per_second": 2.521,
"eval_wer": 0.20272277227722774,
"step": 35500
},
{
"epoch": 3.03,
"learning_rate": 7.070463847203274e-05,
"loss": 0.8929,
"step": 35600
},
{
"epoch": 3.03,
"learning_rate": 7.069184856753069e-05,
"loss": 0.8852,
"step": 35700
},
{
"epoch": 3.04,
"learning_rate": 7.067905866302864e-05,
"loss": 0.8866,
"step": 35800
},
{
"epoch": 3.05,
"learning_rate": 7.06662687585266e-05,
"loss": 0.9074,
"step": 35900
},
{
"epoch": 3.06,
"learning_rate": 7.065347885402455e-05,
"loss": 0.8846,
"step": 36000
},
{
"epoch": 3.06,
"eval_loss": 0.17755627632141113,
"eval_runtime": 789.1834,
"eval_samples_per_second": 20.156,
"eval_steps_per_second": 2.52,
"eval_wer": 0.20464108910891088,
"step": 36000
},
{
"epoch": 3.07,
"learning_rate": 7.06406889495225e-05,
"loss": 0.9049,
"step": 36100
},
{
"epoch": 3.08,
"learning_rate": 7.062789904502046e-05,
"loss": 0.8844,
"step": 36200
},
{
"epoch": 3.08,
"learning_rate": 7.061510914051841e-05,
"loss": 0.8892,
"step": 36300
},
{
"epoch": 3.09,
"learning_rate": 7.060231923601636e-05,
"loss": 0.8914,
"step": 36400
},
{
"epoch": 3.1,
"learning_rate": 7.058952933151431e-05,
"loss": 0.8848,
"step": 36500
},
{
"epoch": 3.1,
"eval_loss": 0.18120767176151276,
"eval_runtime": 784.2619,
"eval_samples_per_second": 20.283,
"eval_steps_per_second": 2.536,
"eval_wer": 0.20635313531353136,
"step": 36500
},
{
"epoch": 3.11,
"learning_rate": 7.05768673260573e-05,
"loss": 0.8994,
"step": 36600
},
{
"epoch": 3.12,
"learning_rate": 7.056407742155525e-05,
"loss": 0.9003,
"step": 36700
},
{
"epoch": 3.13,
"learning_rate": 7.05512875170532e-05,
"loss": 0.8945,
"step": 36800
},
{
"epoch": 3.14,
"learning_rate": 7.053849761255115e-05,
"loss": 0.9029,
"step": 36900
},
{
"epoch": 3.14,
"learning_rate": 7.052570770804911e-05,
"loss": 0.9062,
"step": 37000
},
{
"epoch": 3.14,
"eval_loss": 0.17995673418045044,
"eval_runtime": 786.3908,
"eval_samples_per_second": 20.228,
"eval_steps_per_second": 2.529,
"eval_wer": 0.20178080308030802,
"step": 37000
},
{
"epoch": 3.15,
"learning_rate": 7.051291780354706e-05,
"loss": 0.8966,
"step": 37100
},
{
"epoch": 3.16,
"learning_rate": 7.050012789904501e-05,
"loss": 0.9156,
"step": 37200
},
{
"epoch": 3.17,
"learning_rate": 7.048733799454296e-05,
"loss": 0.9032,
"step": 37300
},
{
"epoch": 3.18,
"learning_rate": 7.047454809004093e-05,
"loss": 0.8911,
"step": 37400
},
{
"epoch": 3.19,
"learning_rate": 7.046175818553888e-05,
"loss": 0.9011,
"step": 37500
},
{
"epoch": 3.19,
"eval_loss": 0.1783064901828766,
"eval_runtime": 785.3165,
"eval_samples_per_second": 20.256,
"eval_steps_per_second": 2.533,
"eval_wer": 0.20491611661166118,
"step": 37500
},
{
"epoch": 3.2,
"learning_rate": 7.044896828103683e-05,
"loss": 0.8918,
"step": 37600
},
{
"epoch": 3.2,
"learning_rate": 7.043617837653478e-05,
"loss": 0.8972,
"step": 37700
},
{
"epoch": 3.21,
"learning_rate": 7.042338847203274e-05,
"loss": 0.8857,
"step": 37800
},
{
"epoch": 3.22,
"learning_rate": 7.041059856753069e-05,
"loss": 0.9027,
"step": 37900
},
{
"epoch": 3.23,
"learning_rate": 7.039780866302864e-05,
"loss": 0.8996,
"step": 38000
},
{
"epoch": 3.23,
"eval_loss": 0.18099059164524078,
"eval_runtime": 793.5734,
"eval_samples_per_second": 20.045,
"eval_steps_per_second": 2.506,
"eval_wer": 0.20363036303630364,
"step": 38000
},
{
"epoch": 3.24,
"learning_rate": 7.038501875852659e-05,
"loss": 0.8978,
"step": 38100
},
{
"epoch": 3.25,
"learning_rate": 7.037222885402455e-05,
"loss": 0.8947,
"step": 38200
},
{
"epoch": 3.25,
"learning_rate": 7.03594389495225e-05,
"loss": 0.8957,
"step": 38300
},
{
"epoch": 3.26,
"learning_rate": 7.034664904502045e-05,
"loss": 0.8847,
"step": 38400
},
{
"epoch": 3.27,
"learning_rate": 7.033385914051842e-05,
"loss": 0.893,
"step": 38500
},
{
"epoch": 3.27,
"eval_loss": 0.18052442371845245,
"eval_runtime": 794.0582,
"eval_samples_per_second": 20.033,
"eval_steps_per_second": 2.505,
"eval_wer": 0.20563806380638064,
"step": 38500
},
{
"epoch": 3.28,
"learning_rate": 7.032106923601637e-05,
"loss": 0.905,
"step": 38600
},
{
"epoch": 3.29,
"learning_rate": 7.030827933151432e-05,
"loss": 0.8916,
"step": 38700
},
{
"epoch": 3.3,
"learning_rate": 7.029548942701227e-05,
"loss": 0.908,
"step": 38800
},
{
"epoch": 3.31,
"learning_rate": 7.028269952251023e-05,
"loss": 0.8984,
"step": 38900
},
{
"epoch": 3.31,
"learning_rate": 7.026990961800818e-05,
"loss": 0.897,
"step": 39000
},
{
"epoch": 3.31,
"eval_loss": 0.17730969190597534,
"eval_runtime": 791.0919,
"eval_samples_per_second": 20.108,
"eval_steps_per_second": 2.514,
"eval_wer": 0.20351347634763475,
"step": 39000
},
{
"epoch": 3.32,
"learning_rate": 7.025711971350613e-05,
"loss": 0.8926,
"step": 39100
},
{
"epoch": 3.33,
"learning_rate": 7.024432980900408e-05,
"loss": 0.9031,
"step": 39200
},
{
"epoch": 3.34,
"learning_rate": 7.023153990450205e-05,
"loss": 0.8943,
"step": 39300
},
{
"epoch": 3.35,
"learning_rate": 7.021875e-05,
"loss": 0.8854,
"step": 39400
},
{
"epoch": 3.36,
"learning_rate": 7.020596009549795e-05,
"loss": 0.8992,
"step": 39500
},
{
"epoch": 3.36,
"eval_loss": 0.18037009239196777,
"eval_runtime": 798.2659,
"eval_samples_per_second": 19.927,
"eval_steps_per_second": 2.492,
"eval_wer": 0.20536303630363037,
"step": 39500
},
{
"epoch": 3.37,
"learning_rate": 7.01931701909959e-05,
"loss": 0.9009,
"step": 39600
},
{
"epoch": 3.37,
"learning_rate": 7.018038028649386e-05,
"loss": 0.8927,
"step": 39700
},
{
"epoch": 3.38,
"learning_rate": 7.016759038199181e-05,
"loss": 0.8803,
"step": 39800
},
{
"epoch": 3.39,
"learning_rate": 7.015480047748976e-05,
"loss": 0.8784,
"step": 39900
},
{
"epoch": 3.4,
"learning_rate": 7.014201057298772e-05,
"loss": 0.8987,
"step": 40000
},
{
"epoch": 3.4,
"eval_loss": 0.17680302262306213,
"eval_runtime": 792.6165,
"eval_samples_per_second": 20.069,
"eval_steps_per_second": 2.509,
"eval_wer": 0.20156765676567656,
"step": 40000
},
{
"epoch": 3.4,
"step": 40000,
"total_flos": 2.0146212451775722e+20,
"train_loss": 0.0,
"train_runtime": 2.2028,
"train_samples_per_second": 0.0,
"train_steps_per_second": 0.0
}
],
"max_steps": 0,
"num_train_epochs": 0,
"total_flos": 2.0146212451775722e+20,
"trial_name": null,
"trial_params": null
}