{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.17, "learning_rate": 7.35e-05, "loss": 11.2783, "step": 100 }, { "epoch": 4.17, "eval_loss": 4.640867233276367, "eval_runtime": 5.3814, "eval_samples_per_second": 26.387, "eval_steps_per_second": 3.345, "eval_wer": 1.0, "step": 100 }, { "epoch": 8.33, "learning_rate": 7.343617021276595e-05, "loss": 3.5578, "step": 200 }, { "epoch": 8.33, "eval_loss": 3.164858341217041, "eval_runtime": 5.2371, "eval_samples_per_second": 27.114, "eval_steps_per_second": 3.437, "eval_wer": 1.0, "step": 200 }, { "epoch": 12.5, "learning_rate": 7.184042553191488e-05, "loss": 3.1279, "step": 300 }, { "epoch": 12.5, "eval_loss": 3.0335276126861572, "eval_runtime": 6.7986, "eval_samples_per_second": 20.887, "eval_steps_per_second": 2.648, "eval_wer": 1.0, "step": 300 }, { "epoch": 16.67, "learning_rate": 7.024468085106383e-05, "loss": 2.9944, "step": 400 }, { "epoch": 16.67, "eval_loss": 2.952620267868042, "eval_runtime": 7.0283, "eval_samples_per_second": 20.204, "eval_steps_per_second": 2.561, "eval_wer": 0.998256320836966, "step": 400 }, { "epoch": 20.83, "learning_rate": 6.864893617021276e-05, "loss": 2.9275, "step": 500 }, { "epoch": 20.83, "eval_loss": 2.929126501083374, "eval_runtime": 6.972, "eval_samples_per_second": 20.367, "eval_steps_per_second": 2.582, "eval_wer": 1.000871839581517, "step": 500 }, { "epoch": 25.0, "learning_rate": 6.70531914893617e-05, "loss": 2.8077, "step": 600 }, { "epoch": 25.0, "eval_loss": 2.563281536102295, "eval_runtime": 7.1264, "eval_samples_per_second": 19.926, "eval_steps_per_second": 2.526, "eval_wer": 0.9895379250217959, "step": 600 }, { "epoch": 29.17, "learning_rate": 6.545744680851063e-05, "loss": 2.4438, "step": 700 }, { "epoch": 29.17, "eval_loss": 1.904543399810791, "eval_runtime": 6.7668, "eval_samples_per_second": 20.985, "eval_steps_per_second": 2.66, "eval_wer": 0.95640802092415, "step": 700 }, { "epoch": 33.33, "learning_rate": 6.386170212765957e-05, "loss": 1.9659, "step": 800 }, { "epoch": 33.33, "eval_loss": 1.4114454984664917, "eval_runtime": 6.9861, "eval_samples_per_second": 20.326, "eval_steps_per_second": 2.577, "eval_wer": 0.7959895379250218, "step": 800 }, { "epoch": 37.5, "learning_rate": 6.226595744680851e-05, "loss": 1.7092, "step": 900 }, { "epoch": 37.5, "eval_loss": 1.2583694458007812, "eval_runtime": 6.989, "eval_samples_per_second": 20.318, "eval_steps_per_second": 2.575, "eval_wer": 0.7637314734088928, "step": 900 }, { "epoch": 41.67, "learning_rate": 6.067021276595744e-05, "loss": 1.517, "step": 1000 }, { "epoch": 41.67, "eval_loss": 1.2040127515792847, "eval_runtime": 5.4494, "eval_samples_per_second": 26.058, "eval_steps_per_second": 3.303, "eval_wer": 0.7506538796861377, "step": 1000 }, { "epoch": 45.83, "learning_rate": 5.907446808510638e-05, "loss": 1.3966, "step": 1100 }, { "epoch": 45.83, "eval_loss": 1.127307415008545, "eval_runtime": 6.9676, "eval_samples_per_second": 20.38, "eval_steps_per_second": 2.583, "eval_wer": 0.7462946817785527, "step": 1100 }, { "epoch": 50.0, "learning_rate": 5.747872340425531e-05, "loss": 1.3197, "step": 1200 }, { "epoch": 50.0, "eval_loss": 1.10543692111969, "eval_runtime": 6.7578, "eval_samples_per_second": 21.013, "eval_steps_per_second": 2.664, "eval_wer": 0.6957279860505667, "step": 1200 }, { "epoch": 54.17, "learning_rate": 5.588297872340425e-05, "loss": 1.2476, "step": 1300 }, { "epoch": 54.17, "eval_loss": 1.1034547090530396, "eval_runtime": 6.9962, "eval_samples_per_second": 20.297, "eval_steps_per_second": 2.573, "eval_wer": 0.7000871839581517, "step": 1300 }, { "epoch": 58.33, "learning_rate": 5.428723404255319e-05, "loss": 1.1796, "step": 1400 }, { "epoch": 58.33, "eval_loss": 1.0890159606933594, "eval_runtime": 6.8836, "eval_samples_per_second": 20.629, "eval_steps_per_second": 2.615, "eval_wer": 0.7096774193548387, "step": 1400 }, { "epoch": 62.5, "learning_rate": 5.269148936170212e-05, "loss": 1.1237, "step": 1500 }, { "epoch": 62.5, "eval_loss": 1.0882998704910278, "eval_runtime": 7.0292, "eval_samples_per_second": 20.202, "eval_steps_per_second": 2.561, "eval_wer": 0.7166521360069747, "step": 1500 }, { "epoch": 66.67, "learning_rate": 5.109574468085105e-05, "loss": 1.0777, "step": 1600 }, { "epoch": 66.67, "eval_loss": 1.106709599494934, "eval_runtime": 6.9652, "eval_samples_per_second": 20.387, "eval_steps_per_second": 2.584, "eval_wer": 0.7218831734960767, "step": 1600 }, { "epoch": 70.83, "learning_rate": 4.95e-05, "loss": 1.0051, "step": 1700 }, { "epoch": 70.83, "eval_loss": 1.111539363861084, "eval_runtime": 5.3056, "eval_samples_per_second": 26.764, "eval_steps_per_second": 3.393, "eval_wer": 0.7236268526591108, "step": 1700 }, { "epoch": 75.0, "learning_rate": 4.7904255319148935e-05, "loss": 0.9521, "step": 1800 }, { "epoch": 75.0, "eval_loss": 1.0866659879684448, "eval_runtime": 7.6166, "eval_samples_per_second": 18.644, "eval_steps_per_second": 2.363, "eval_wer": 0.7131647776809067, "step": 1800 }, { "epoch": 79.17, "learning_rate": 4.6308510638297865e-05, "loss": 0.9147, "step": 1900 }, { "epoch": 79.17, "eval_loss": 1.0851967334747314, "eval_runtime": 6.7698, "eval_samples_per_second": 20.975, "eval_steps_per_second": 2.659, "eval_wer": 0.7210113339145597, "step": 1900 }, { "epoch": 83.33, "learning_rate": 4.471276595744681e-05, "loss": 0.8798, "step": 2000 }, { "epoch": 83.33, "eval_loss": 1.1411497592926025, "eval_runtime": 6.711, "eval_samples_per_second": 21.159, "eval_steps_per_second": 2.682, "eval_wer": 0.7096774193548387, "step": 2000 }, { "epoch": 87.5, "learning_rate": 4.311702127659574e-05, "loss": 0.8317, "step": 2100 }, { "epoch": 87.5, "eval_loss": 1.1634019613265991, "eval_runtime": 6.8272, "eval_samples_per_second": 20.799, "eval_steps_per_second": 2.637, "eval_wer": 0.7018308631211857, "step": 2100 }, { "epoch": 91.67, "learning_rate": 4.152127659574468e-05, "loss": 0.7946, "step": 2200 }, { "epoch": 91.67, "eval_loss": 1.1620630025863647, "eval_runtime": 7.1289, "eval_samples_per_second": 19.919, "eval_steps_per_second": 2.525, "eval_wer": 0.7201394943330427, "step": 2200 }, { "epoch": 95.83, "learning_rate": 3.992553191489361e-05, "loss": 0.7594, "step": 2300 }, { "epoch": 95.83, "eval_loss": 1.1481679677963257, "eval_runtime": 7.0324, "eval_samples_per_second": 20.192, "eval_steps_per_second": 2.56, "eval_wer": 0.7035745422842197, "step": 2300 }, { "epoch": 100.0, "learning_rate": 3.834574468085106e-05, "loss": 0.729, "step": 2400 }, { "epoch": 100.0, "eval_loss": 1.1493021249771118, "eval_runtime": 6.9652, "eval_samples_per_second": 20.387, "eval_steps_per_second": 2.584, "eval_wer": 0.7061900610287707, "step": 2400 }, { "epoch": 104.17, "learning_rate": 3.675e-05, "loss": 0.7055, "step": 2500 }, { "epoch": 104.17, "eval_loss": 1.1725823879241943, "eval_runtime": 7.0084, "eval_samples_per_second": 20.261, "eval_steps_per_second": 2.568, "eval_wer": 0.6931124673060157, "step": 2500 }, { "epoch": 108.33, "learning_rate": 3.5154255319148936e-05, "loss": 0.6622, "step": 2600 }, { "epoch": 108.33, "eval_loss": 1.1937670707702637, "eval_runtime": 7.4493, "eval_samples_per_second": 19.062, "eval_steps_per_second": 2.416, "eval_wer": 0.7000871839581517, "step": 2600 }, { "epoch": 112.5, "learning_rate": 3.355851063829787e-05, "loss": 0.6583, "step": 2700 }, { "epoch": 112.5, "eval_loss": 1.1832083463668823, "eval_runtime": 6.9743, "eval_samples_per_second": 20.361, "eval_steps_per_second": 2.581, "eval_wer": 0.7149084568439407, "step": 2700 }, { "epoch": 116.67, "learning_rate": 3.1962765957446805e-05, "loss": 0.6299, "step": 2800 }, { "epoch": 116.67, "eval_loss": 1.1996266841888428, "eval_runtime": 7.2192, "eval_samples_per_second": 19.67, "eval_steps_per_second": 2.493, "eval_wer": 0.7175239755884917, "step": 2800 }, { "epoch": 120.83, "learning_rate": 3.036702127659574e-05, "loss": 0.5903, "step": 2900 }, { "epoch": 120.83, "eval_loss": 1.1986336708068848, "eval_runtime": 7.2606, "eval_samples_per_second": 19.558, "eval_steps_per_second": 2.479, "eval_wer": 0.7131647776809067, "step": 2900 }, { "epoch": 125.0, "learning_rate": 2.877127659574468e-05, "loss": 0.5816, "step": 3000 }, { "epoch": 125.0, "eval_loss": 1.1909323930740356, "eval_runtime": 6.9272, "eval_samples_per_second": 20.499, "eval_steps_per_second": 2.598, "eval_wer": 0.7009590235396687, "step": 3000 }, { "epoch": 129.17, "learning_rate": 2.7175531914893614e-05, "loss": 0.5583, "step": 3100 }, { "epoch": 129.17, "eval_loss": 1.207918405532837, "eval_runtime": 6.9368, "eval_samples_per_second": 20.471, "eval_steps_per_second": 2.595, "eval_wer": 0.6870095902353966, "step": 3100 }, { "epoch": 133.33, "learning_rate": 2.5579787234042552e-05, "loss": 0.5392, "step": 3200 }, { "epoch": 133.33, "eval_loss": 1.2108745574951172, "eval_runtime": 5.9814, "eval_samples_per_second": 23.74, "eval_steps_per_second": 3.009, "eval_wer": 0.7227550130775937, "step": 3200 }, { "epoch": 137.5, "learning_rate": 2.398404255319149e-05, "loss": 0.5412, "step": 3300 }, { "epoch": 137.5, "eval_loss": 1.235259771347046, "eval_runtime": 6.7958, "eval_samples_per_second": 20.895, "eval_steps_per_second": 2.649, "eval_wer": 0.7244986922406277, "step": 3300 }, { "epoch": 141.67, "learning_rate": 2.2388297872340424e-05, "loss": 0.5136, "step": 3400 }, { "epoch": 141.67, "eval_loss": 1.2390460968017578, "eval_runtime": 6.7486, "eval_samples_per_second": 21.042, "eval_steps_per_second": 2.667, "eval_wer": 0.7253705318221447, "step": 3400 }, { "epoch": 145.83, "learning_rate": 2.079255319148936e-05, "loss": 0.5007, "step": 3500 }, { "epoch": 145.83, "eval_loss": 1.227264165878296, "eval_runtime": 6.7331, "eval_samples_per_second": 21.09, "eval_steps_per_second": 2.673, "eval_wer": 0.7122929380993898, "step": 3500 }, { "epoch": 150.0, "learning_rate": 1.9196808510638296e-05, "loss": 0.4883, "step": 3600 }, { "epoch": 150.0, "eval_loss": 1.2772815227508545, "eval_runtime": 6.6223, "eval_samples_per_second": 21.443, "eval_steps_per_second": 2.718, "eval_wer": 0.7288578901482128, "step": 3600 }, { "epoch": 154.17, "learning_rate": 1.7601063829787233e-05, "loss": 0.4835, "step": 3700 }, { "epoch": 154.17, "eval_loss": 1.2677749395370483, "eval_runtime": 6.8418, "eval_samples_per_second": 20.755, "eval_steps_per_second": 2.631, "eval_wer": 0.7288578901482128, "step": 3700 }, { "epoch": 158.33, "learning_rate": 1.600531914893617e-05, "loss": 0.4568, "step": 3800 }, { "epoch": 158.33, "eval_loss": 1.2592484951019287, "eval_runtime": 6.8949, "eval_samples_per_second": 20.595, "eval_steps_per_second": 2.611, "eval_wer": 0.7349607672188317, "step": 3800 }, { "epoch": 162.5, "learning_rate": 1.4409574468085105e-05, "loss": 0.4525, "step": 3900 }, { "epoch": 162.5, "eval_loss": 1.270469069480896, "eval_runtime": 5.5893, "eval_samples_per_second": 25.406, "eval_steps_per_second": 3.22, "eval_wer": 0.7253705318221447, "step": 3900 }, { "epoch": 166.67, "learning_rate": 1.2813829787234041e-05, "loss": 0.4379, "step": 4000 }, { "epoch": 166.67, "eval_loss": 1.2717314958572388, "eval_runtime": 6.6834, "eval_samples_per_second": 21.247, "eval_steps_per_second": 2.693, "eval_wer": 0.7306015693112468, "step": 4000 }, { "epoch": 170.83, "learning_rate": 1.1218085106382979e-05, "loss": 0.4198, "step": 4100 }, { "epoch": 170.83, "eval_loss": 1.2617682218551636, "eval_runtime": 7.1514, "eval_samples_per_second": 19.856, "eval_steps_per_second": 2.517, "eval_wer": 0.7218831734960767, "step": 4100 }, { "epoch": 175.0, "learning_rate": 9.622340425531914e-06, "loss": 0.4216, "step": 4200 }, { "epoch": 175.0, "eval_loss": 1.2908614873886108, "eval_runtime": 7.5161, "eval_samples_per_second": 18.893, "eval_steps_per_second": 2.395, "eval_wer": 0.7157802964254577, "step": 4200 }, { "epoch": 179.17, "learning_rate": 8.02659574468085e-06, "loss": 0.4305, "step": 4300 }, { "epoch": 179.17, "eval_loss": 1.2808016538619995, "eval_runtime": 7.0468, "eval_samples_per_second": 20.151, "eval_steps_per_second": 2.554, "eval_wer": 0.7166521360069747, "step": 4300 }, { "epoch": 183.33, "learning_rate": 6.446808510638297e-06, "loss": 0.399, "step": 4400 }, { "epoch": 183.33, "eval_loss": 1.2750086784362793, "eval_runtime": 6.8372, "eval_samples_per_second": 20.769, "eval_steps_per_second": 2.633, "eval_wer": 0.7192676547515258, "step": 4400 }, { "epoch": 187.5, "learning_rate": 4.851063829787233e-06, "loss": 0.3937, "step": 4500 }, { "epoch": 187.5, "eval_loss": 1.271910309791565, "eval_runtime": 7.0869, "eval_samples_per_second": 20.037, "eval_steps_per_second": 2.54, "eval_wer": 0.7149084568439407, "step": 4500 }, { "epoch": 191.67, "learning_rate": 3.25531914893617e-06, "loss": 0.3905, "step": 4600 }, { "epoch": 191.67, "eval_loss": 1.2815755605697632, "eval_runtime": 5.4594, "eval_samples_per_second": 26.01, "eval_steps_per_second": 3.297, "eval_wer": 0.7157802964254577, "step": 4600 }, { "epoch": 195.83, "learning_rate": 1.6595744680851062e-06, "loss": 0.3892, "step": 4700 }, { "epoch": 195.83, "eval_loss": 1.295116901397705, "eval_runtime": 7.0588, "eval_samples_per_second": 20.117, "eval_steps_per_second": 2.55, "eval_wer": 0.7210113339145597, "step": 4700 }, { "epoch": 200.0, "learning_rate": 6.382978723404255e-08, "loss": 0.3932, "step": 4800 }, { "epoch": 200.0, "eval_loss": 1.2923693656921387, "eval_runtime": 7.179, "eval_samples_per_second": 19.78, "eval_steps_per_second": 2.507, "eval_wer": 0.7201394943330427, "step": 4800 }, { "epoch": 200.0, "step": 4800, "total_flos": 1.0000910465788367e+19, "train_loss": 1.2620406293869018, "train_runtime": 6643.3653, "train_samples_per_second": 11.38, "train_steps_per_second": 0.723 } ], "max_steps": 4800, "num_train_epochs": 200, "total_flos": 1.0000910465788367e+19, "trial_name": null, "trial_params": null }