xls-r-300m-ur-cv7 / trainer_state.json
HarrisDePerceptron's picture
End of training
9834419
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 200.0,
"global_step": 4800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.17,
"learning_rate": 7.35e-05,
"loss": 11.2783,
"step": 100
},
{
"epoch": 4.17,
"eval_loss": 4.640867233276367,
"eval_runtime": 5.3814,
"eval_samples_per_second": 26.387,
"eval_steps_per_second": 3.345,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 8.33,
"learning_rate": 7.343617021276595e-05,
"loss": 3.5578,
"step": 200
},
{
"epoch": 8.33,
"eval_loss": 3.164858341217041,
"eval_runtime": 5.2371,
"eval_samples_per_second": 27.114,
"eval_steps_per_second": 3.437,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 12.5,
"learning_rate": 7.184042553191488e-05,
"loss": 3.1279,
"step": 300
},
{
"epoch": 12.5,
"eval_loss": 3.0335276126861572,
"eval_runtime": 6.7986,
"eval_samples_per_second": 20.887,
"eval_steps_per_second": 2.648,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 16.67,
"learning_rate": 7.024468085106383e-05,
"loss": 2.9944,
"step": 400
},
{
"epoch": 16.67,
"eval_loss": 2.952620267868042,
"eval_runtime": 7.0283,
"eval_samples_per_second": 20.204,
"eval_steps_per_second": 2.561,
"eval_wer": 0.998256320836966,
"step": 400
},
{
"epoch": 20.83,
"learning_rate": 6.864893617021276e-05,
"loss": 2.9275,
"step": 500
},
{
"epoch": 20.83,
"eval_loss": 2.929126501083374,
"eval_runtime": 6.972,
"eval_samples_per_second": 20.367,
"eval_steps_per_second": 2.582,
"eval_wer": 1.000871839581517,
"step": 500
},
{
"epoch": 25.0,
"learning_rate": 6.70531914893617e-05,
"loss": 2.8077,
"step": 600
},
{
"epoch": 25.0,
"eval_loss": 2.563281536102295,
"eval_runtime": 7.1264,
"eval_samples_per_second": 19.926,
"eval_steps_per_second": 2.526,
"eval_wer": 0.9895379250217959,
"step": 600
},
{
"epoch": 29.17,
"learning_rate": 6.545744680851063e-05,
"loss": 2.4438,
"step": 700
},
{
"epoch": 29.17,
"eval_loss": 1.904543399810791,
"eval_runtime": 6.7668,
"eval_samples_per_second": 20.985,
"eval_steps_per_second": 2.66,
"eval_wer": 0.95640802092415,
"step": 700
},
{
"epoch": 33.33,
"learning_rate": 6.386170212765957e-05,
"loss": 1.9659,
"step": 800
},
{
"epoch": 33.33,
"eval_loss": 1.4114454984664917,
"eval_runtime": 6.9861,
"eval_samples_per_second": 20.326,
"eval_steps_per_second": 2.577,
"eval_wer": 0.7959895379250218,
"step": 800
},
{
"epoch": 37.5,
"learning_rate": 6.226595744680851e-05,
"loss": 1.7092,
"step": 900
},
{
"epoch": 37.5,
"eval_loss": 1.2583694458007812,
"eval_runtime": 6.989,
"eval_samples_per_second": 20.318,
"eval_steps_per_second": 2.575,
"eval_wer": 0.7637314734088928,
"step": 900
},
{
"epoch": 41.67,
"learning_rate": 6.067021276595744e-05,
"loss": 1.517,
"step": 1000
},
{
"epoch": 41.67,
"eval_loss": 1.2040127515792847,
"eval_runtime": 5.4494,
"eval_samples_per_second": 26.058,
"eval_steps_per_second": 3.303,
"eval_wer": 0.7506538796861377,
"step": 1000
},
{
"epoch": 45.83,
"learning_rate": 5.907446808510638e-05,
"loss": 1.3966,
"step": 1100
},
{
"epoch": 45.83,
"eval_loss": 1.127307415008545,
"eval_runtime": 6.9676,
"eval_samples_per_second": 20.38,
"eval_steps_per_second": 2.583,
"eval_wer": 0.7462946817785527,
"step": 1100
},
{
"epoch": 50.0,
"learning_rate": 5.747872340425531e-05,
"loss": 1.3197,
"step": 1200
},
{
"epoch": 50.0,
"eval_loss": 1.10543692111969,
"eval_runtime": 6.7578,
"eval_samples_per_second": 21.013,
"eval_steps_per_second": 2.664,
"eval_wer": 0.6957279860505667,
"step": 1200
},
{
"epoch": 54.17,
"learning_rate": 5.588297872340425e-05,
"loss": 1.2476,
"step": 1300
},
{
"epoch": 54.17,
"eval_loss": 1.1034547090530396,
"eval_runtime": 6.9962,
"eval_samples_per_second": 20.297,
"eval_steps_per_second": 2.573,
"eval_wer": 0.7000871839581517,
"step": 1300
},
{
"epoch": 58.33,
"learning_rate": 5.428723404255319e-05,
"loss": 1.1796,
"step": 1400
},
{
"epoch": 58.33,
"eval_loss": 1.0890159606933594,
"eval_runtime": 6.8836,
"eval_samples_per_second": 20.629,
"eval_steps_per_second": 2.615,
"eval_wer": 0.7096774193548387,
"step": 1400
},
{
"epoch": 62.5,
"learning_rate": 5.269148936170212e-05,
"loss": 1.1237,
"step": 1500
},
{
"epoch": 62.5,
"eval_loss": 1.0882998704910278,
"eval_runtime": 7.0292,
"eval_samples_per_second": 20.202,
"eval_steps_per_second": 2.561,
"eval_wer": 0.7166521360069747,
"step": 1500
},
{
"epoch": 66.67,
"learning_rate": 5.109574468085105e-05,
"loss": 1.0777,
"step": 1600
},
{
"epoch": 66.67,
"eval_loss": 1.106709599494934,
"eval_runtime": 6.9652,
"eval_samples_per_second": 20.387,
"eval_steps_per_second": 2.584,
"eval_wer": 0.7218831734960767,
"step": 1600
},
{
"epoch": 70.83,
"learning_rate": 4.95e-05,
"loss": 1.0051,
"step": 1700
},
{
"epoch": 70.83,
"eval_loss": 1.111539363861084,
"eval_runtime": 5.3056,
"eval_samples_per_second": 26.764,
"eval_steps_per_second": 3.393,
"eval_wer": 0.7236268526591108,
"step": 1700
},
{
"epoch": 75.0,
"learning_rate": 4.7904255319148935e-05,
"loss": 0.9521,
"step": 1800
},
{
"epoch": 75.0,
"eval_loss": 1.0866659879684448,
"eval_runtime": 7.6166,
"eval_samples_per_second": 18.644,
"eval_steps_per_second": 2.363,
"eval_wer": 0.7131647776809067,
"step": 1800
},
{
"epoch": 79.17,
"learning_rate": 4.6308510638297865e-05,
"loss": 0.9147,
"step": 1900
},
{
"epoch": 79.17,
"eval_loss": 1.0851967334747314,
"eval_runtime": 6.7698,
"eval_samples_per_second": 20.975,
"eval_steps_per_second": 2.659,
"eval_wer": 0.7210113339145597,
"step": 1900
},
{
"epoch": 83.33,
"learning_rate": 4.471276595744681e-05,
"loss": 0.8798,
"step": 2000
},
{
"epoch": 83.33,
"eval_loss": 1.1411497592926025,
"eval_runtime": 6.711,
"eval_samples_per_second": 21.159,
"eval_steps_per_second": 2.682,
"eval_wer": 0.7096774193548387,
"step": 2000
},
{
"epoch": 87.5,
"learning_rate": 4.311702127659574e-05,
"loss": 0.8317,
"step": 2100
},
{
"epoch": 87.5,
"eval_loss": 1.1634019613265991,
"eval_runtime": 6.8272,
"eval_samples_per_second": 20.799,
"eval_steps_per_second": 2.637,
"eval_wer": 0.7018308631211857,
"step": 2100
},
{
"epoch": 91.67,
"learning_rate": 4.152127659574468e-05,
"loss": 0.7946,
"step": 2200
},
{
"epoch": 91.67,
"eval_loss": 1.1620630025863647,
"eval_runtime": 7.1289,
"eval_samples_per_second": 19.919,
"eval_steps_per_second": 2.525,
"eval_wer": 0.7201394943330427,
"step": 2200
},
{
"epoch": 95.83,
"learning_rate": 3.992553191489361e-05,
"loss": 0.7594,
"step": 2300
},
{
"epoch": 95.83,
"eval_loss": 1.1481679677963257,
"eval_runtime": 7.0324,
"eval_samples_per_second": 20.192,
"eval_steps_per_second": 2.56,
"eval_wer": 0.7035745422842197,
"step": 2300
},
{
"epoch": 100.0,
"learning_rate": 3.834574468085106e-05,
"loss": 0.729,
"step": 2400
},
{
"epoch": 100.0,
"eval_loss": 1.1493021249771118,
"eval_runtime": 6.9652,
"eval_samples_per_second": 20.387,
"eval_steps_per_second": 2.584,
"eval_wer": 0.7061900610287707,
"step": 2400
},
{
"epoch": 104.17,
"learning_rate": 3.675e-05,
"loss": 0.7055,
"step": 2500
},
{
"epoch": 104.17,
"eval_loss": 1.1725823879241943,
"eval_runtime": 7.0084,
"eval_samples_per_second": 20.261,
"eval_steps_per_second": 2.568,
"eval_wer": 0.6931124673060157,
"step": 2500
},
{
"epoch": 108.33,
"learning_rate": 3.5154255319148936e-05,
"loss": 0.6622,
"step": 2600
},
{
"epoch": 108.33,
"eval_loss": 1.1937670707702637,
"eval_runtime": 7.4493,
"eval_samples_per_second": 19.062,
"eval_steps_per_second": 2.416,
"eval_wer": 0.7000871839581517,
"step": 2600
},
{
"epoch": 112.5,
"learning_rate": 3.355851063829787e-05,
"loss": 0.6583,
"step": 2700
},
{
"epoch": 112.5,
"eval_loss": 1.1832083463668823,
"eval_runtime": 6.9743,
"eval_samples_per_second": 20.361,
"eval_steps_per_second": 2.581,
"eval_wer": 0.7149084568439407,
"step": 2700
},
{
"epoch": 116.67,
"learning_rate": 3.1962765957446805e-05,
"loss": 0.6299,
"step": 2800
},
{
"epoch": 116.67,
"eval_loss": 1.1996266841888428,
"eval_runtime": 7.2192,
"eval_samples_per_second": 19.67,
"eval_steps_per_second": 2.493,
"eval_wer": 0.7175239755884917,
"step": 2800
},
{
"epoch": 120.83,
"learning_rate": 3.036702127659574e-05,
"loss": 0.5903,
"step": 2900
},
{
"epoch": 120.83,
"eval_loss": 1.1986336708068848,
"eval_runtime": 7.2606,
"eval_samples_per_second": 19.558,
"eval_steps_per_second": 2.479,
"eval_wer": 0.7131647776809067,
"step": 2900
},
{
"epoch": 125.0,
"learning_rate": 2.877127659574468e-05,
"loss": 0.5816,
"step": 3000
},
{
"epoch": 125.0,
"eval_loss": 1.1909323930740356,
"eval_runtime": 6.9272,
"eval_samples_per_second": 20.499,
"eval_steps_per_second": 2.598,
"eval_wer": 0.7009590235396687,
"step": 3000
},
{
"epoch": 129.17,
"learning_rate": 2.7175531914893614e-05,
"loss": 0.5583,
"step": 3100
},
{
"epoch": 129.17,
"eval_loss": 1.207918405532837,
"eval_runtime": 6.9368,
"eval_samples_per_second": 20.471,
"eval_steps_per_second": 2.595,
"eval_wer": 0.6870095902353966,
"step": 3100
},
{
"epoch": 133.33,
"learning_rate": 2.5579787234042552e-05,
"loss": 0.5392,
"step": 3200
},
{
"epoch": 133.33,
"eval_loss": 1.2108745574951172,
"eval_runtime": 5.9814,
"eval_samples_per_second": 23.74,
"eval_steps_per_second": 3.009,
"eval_wer": 0.7227550130775937,
"step": 3200
},
{
"epoch": 137.5,
"learning_rate": 2.398404255319149e-05,
"loss": 0.5412,
"step": 3300
},
{
"epoch": 137.5,
"eval_loss": 1.235259771347046,
"eval_runtime": 6.7958,
"eval_samples_per_second": 20.895,
"eval_steps_per_second": 2.649,
"eval_wer": 0.7244986922406277,
"step": 3300
},
{
"epoch": 141.67,
"learning_rate": 2.2388297872340424e-05,
"loss": 0.5136,
"step": 3400
},
{
"epoch": 141.67,
"eval_loss": 1.2390460968017578,
"eval_runtime": 6.7486,
"eval_samples_per_second": 21.042,
"eval_steps_per_second": 2.667,
"eval_wer": 0.7253705318221447,
"step": 3400
},
{
"epoch": 145.83,
"learning_rate": 2.079255319148936e-05,
"loss": 0.5007,
"step": 3500
},
{
"epoch": 145.83,
"eval_loss": 1.227264165878296,
"eval_runtime": 6.7331,
"eval_samples_per_second": 21.09,
"eval_steps_per_second": 2.673,
"eval_wer": 0.7122929380993898,
"step": 3500
},
{
"epoch": 150.0,
"learning_rate": 1.9196808510638296e-05,
"loss": 0.4883,
"step": 3600
},
{
"epoch": 150.0,
"eval_loss": 1.2772815227508545,
"eval_runtime": 6.6223,
"eval_samples_per_second": 21.443,
"eval_steps_per_second": 2.718,
"eval_wer": 0.7288578901482128,
"step": 3600
},
{
"epoch": 154.17,
"learning_rate": 1.7601063829787233e-05,
"loss": 0.4835,
"step": 3700
},
{
"epoch": 154.17,
"eval_loss": 1.2677749395370483,
"eval_runtime": 6.8418,
"eval_samples_per_second": 20.755,
"eval_steps_per_second": 2.631,
"eval_wer": 0.7288578901482128,
"step": 3700
},
{
"epoch": 158.33,
"learning_rate": 1.600531914893617e-05,
"loss": 0.4568,
"step": 3800
},
{
"epoch": 158.33,
"eval_loss": 1.2592484951019287,
"eval_runtime": 6.8949,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 2.611,
"eval_wer": 0.7349607672188317,
"step": 3800
},
{
"epoch": 162.5,
"learning_rate": 1.4409574468085105e-05,
"loss": 0.4525,
"step": 3900
},
{
"epoch": 162.5,
"eval_loss": 1.270469069480896,
"eval_runtime": 5.5893,
"eval_samples_per_second": 25.406,
"eval_steps_per_second": 3.22,
"eval_wer": 0.7253705318221447,
"step": 3900
},
{
"epoch": 166.67,
"learning_rate": 1.2813829787234041e-05,
"loss": 0.4379,
"step": 4000
},
{
"epoch": 166.67,
"eval_loss": 1.2717314958572388,
"eval_runtime": 6.6834,
"eval_samples_per_second": 21.247,
"eval_steps_per_second": 2.693,
"eval_wer": 0.7306015693112468,
"step": 4000
},
{
"epoch": 170.83,
"learning_rate": 1.1218085106382979e-05,
"loss": 0.4198,
"step": 4100
},
{
"epoch": 170.83,
"eval_loss": 1.2617682218551636,
"eval_runtime": 7.1514,
"eval_samples_per_second": 19.856,
"eval_steps_per_second": 2.517,
"eval_wer": 0.7218831734960767,
"step": 4100
},
{
"epoch": 175.0,
"learning_rate": 9.622340425531914e-06,
"loss": 0.4216,
"step": 4200
},
{
"epoch": 175.0,
"eval_loss": 1.2908614873886108,
"eval_runtime": 7.5161,
"eval_samples_per_second": 18.893,
"eval_steps_per_second": 2.395,
"eval_wer": 0.7157802964254577,
"step": 4200
},
{
"epoch": 179.17,
"learning_rate": 8.02659574468085e-06,
"loss": 0.4305,
"step": 4300
},
{
"epoch": 179.17,
"eval_loss": 1.2808016538619995,
"eval_runtime": 7.0468,
"eval_samples_per_second": 20.151,
"eval_steps_per_second": 2.554,
"eval_wer": 0.7166521360069747,
"step": 4300
},
{
"epoch": 183.33,
"learning_rate": 6.446808510638297e-06,
"loss": 0.399,
"step": 4400
},
{
"epoch": 183.33,
"eval_loss": 1.2750086784362793,
"eval_runtime": 6.8372,
"eval_samples_per_second": 20.769,
"eval_steps_per_second": 2.633,
"eval_wer": 0.7192676547515258,
"step": 4400
},
{
"epoch": 187.5,
"learning_rate": 4.851063829787233e-06,
"loss": 0.3937,
"step": 4500
},
{
"epoch": 187.5,
"eval_loss": 1.271910309791565,
"eval_runtime": 7.0869,
"eval_samples_per_second": 20.037,
"eval_steps_per_second": 2.54,
"eval_wer": 0.7149084568439407,
"step": 4500
},
{
"epoch": 191.67,
"learning_rate": 3.25531914893617e-06,
"loss": 0.3905,
"step": 4600
},
{
"epoch": 191.67,
"eval_loss": 1.2815755605697632,
"eval_runtime": 5.4594,
"eval_samples_per_second": 26.01,
"eval_steps_per_second": 3.297,
"eval_wer": 0.7157802964254577,
"step": 4600
},
{
"epoch": 195.83,
"learning_rate": 1.6595744680851062e-06,
"loss": 0.3892,
"step": 4700
},
{
"epoch": 195.83,
"eval_loss": 1.295116901397705,
"eval_runtime": 7.0588,
"eval_samples_per_second": 20.117,
"eval_steps_per_second": 2.55,
"eval_wer": 0.7210113339145597,
"step": 4700
},
{
"epoch": 200.0,
"learning_rate": 6.382978723404255e-08,
"loss": 0.3932,
"step": 4800
},
{
"epoch": 200.0,
"eval_loss": 1.2923693656921387,
"eval_runtime": 7.179,
"eval_samples_per_second": 19.78,
"eval_steps_per_second": 2.507,
"eval_wer": 0.7201394943330427,
"step": 4800
},
{
"epoch": 200.0,
"step": 4800,
"total_flos": 1.0000910465788367e+19,
"train_loss": 1.2620406293869018,
"train_runtime": 6643.3653,
"train_samples_per_second": 11.38,
"train_steps_per_second": 0.723
}
],
"max_steps": 4800,
"num_train_epochs": 200,
"total_flos": 1.0000910465788367e+19,
"trial_name": null,
"trial_params": null
}