{ "best_metric": null, "best_model_checkpoint": null, "epoch": 119.99875156054931, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0, "learning_rate": 7.960000000000001e-05, "loss": 7.6701, "step": 400 }, { "epoch": 2.0, "eval_loss": 2.900460958480835, "eval_runtime": 252.4053, "eval_samples_per_second": 20.653, "eval_steps_per_second": 2.583, "eval_wer": 1.0, "step": 400 }, { "epoch": 4.0, "learning_rate": 9.873191489361703e-05, "loss": 1.0784, "step": 800 }, { "epoch": 4.0, "eval_loss": 0.4933229088783264, "eval_runtime": 254.2522, "eval_samples_per_second": 20.503, "eval_steps_per_second": 2.564, "eval_wer": 0.46976841118086854, "step": 800 }, { "epoch": 6.0, "learning_rate": 9.702978723404256e-05, "loss": 0.3128, "step": 1200 }, { "epoch": 6.0, "eval_loss": 0.3993886709213257, "eval_runtime": 252.398, "eval_samples_per_second": 20.654, "eval_steps_per_second": 2.583, "eval_wer": 0.37059421158204603, "step": 1200 }, { "epoch": 8.0, "learning_rate": 9.532765957446809e-05, "loss": 0.2158, "step": 1600 }, { "epoch": 8.0, "eval_loss": 0.3813767731189728, "eval_runtime": 260.377, "eval_samples_per_second": 20.021, "eval_steps_per_second": 2.504, "eval_wer": 0.3755438038919426, "step": 1600 }, { "epoch": 10.0, "learning_rate": 9.362553191489361e-05, "loss": 0.1716, "step": 2000 }, { "epoch": 10.0, "eval_loss": 0.3713369071483612, "eval_runtime": 256.1576, "eval_samples_per_second": 20.351, "eval_steps_per_second": 2.545, "eval_wer": 0.33745799359158046, "step": 2000 }, { "epoch": 12.0, "learning_rate": 9.192340425531915e-05, "loss": 0.1419, "step": 2400 }, { "epoch": 12.0, "eval_loss": 0.3995266556739807, "eval_runtime": 252.1996, "eval_samples_per_second": 20.67, "eval_steps_per_second": 2.585, "eval_wer": 0.35183786177612214, "step": 2400 }, { "epoch": 14.0, "learning_rate": 9.022127659574468e-05, "loss": 0.1236, "step": 2800 }, { "epoch": 14.0, "eval_loss": 0.41960370540618896, "eval_runtime": 256.7174, "eval_samples_per_second": 20.306, "eval_steps_per_second": 2.54, "eval_wer": 0.34042774897751843, "step": 2800 }, { "epoch": 16.0, "learning_rate": 8.851914893617021e-05, "loss": 0.1136, "step": 3200 }, { "epoch": 16.0, "eval_loss": 0.3961251676082611, "eval_runtime": 257.0042, "eval_samples_per_second": 20.284, "eval_steps_per_second": 2.537, "eval_wer": 0.32029072342199183, "step": 3200 }, { "epoch": 18.0, "learning_rate": 8.682127659574468e-05, "loss": 0.1018, "step": 3600 }, { "epoch": 18.0, "eval_loss": 0.4284366965293884, "eval_runtime": 253.1831, "eval_samples_per_second": 20.59, "eval_steps_per_second": 2.575, "eval_wer": 0.3373798421340558, "step": 3600 }, { "epoch": 20.0, "learning_rate": 8.511914893617021e-05, "loss": 0.0948, "step": 4000 }, { "epoch": 20.0, "eval_loss": 0.4246082305908203, "eval_runtime": 250.1867, "eval_samples_per_second": 20.836, "eval_steps_per_second": 2.606, "eval_wer": 0.32953864589574594, "step": 4000 }, { "epoch": 22.0, "learning_rate": 8.341702127659575e-05, "loss": 0.0837, "step": 4400 }, { "epoch": 22.0, "eval_loss": 0.41016092896461487, "eval_runtime": 255.5501, "eval_samples_per_second": 20.399, "eval_steps_per_second": 2.551, "eval_wer": 0.31726886706437074, "step": 4400 }, { "epoch": 24.0, "learning_rate": 8.171489361702128e-05, "loss": 0.0823, "step": 4800 }, { "epoch": 24.0, "eval_loss": 0.42129427194595337, "eval_runtime": 254.0415, "eval_samples_per_second": 20.52, "eval_steps_per_second": 2.567, "eval_wer": 0.3245109021283247, "step": 4800 }, { "epoch": 26.0, "learning_rate": 8.001276595744681e-05, "loss": 0.079, "step": 5200 }, { "epoch": 26.0, "eval_loss": 0.4289919435977936, "eval_runtime": 256.8225, "eval_samples_per_second": 20.298, "eval_steps_per_second": 2.539, "eval_wer": 0.3159923932581343, "step": 5200 }, { "epoch": 28.0, "learning_rate": 7.831063829787234e-05, "loss": 0.0736, "step": 5600 }, { "epoch": 28.0, "eval_loss": 0.45376530289649963, "eval_runtime": 250.495, "eval_samples_per_second": 20.811, "eval_steps_per_second": 2.603, "eval_wer": 0.3086982572224972, "step": 5600 }, { "epoch": 30.0, "learning_rate": 7.66127659574468e-05, "loss": 0.0702, "step": 6000 }, { "epoch": 30.0, "eval_loss": 0.4636918306350708, "eval_runtime": 251.4184, "eval_samples_per_second": 20.734, "eval_steps_per_second": 2.593, "eval_wer": 0.31151170969338576, "step": 6000 }, { "epoch": 32.0, "learning_rate": 7.491063829787235e-05, "loss": 0.0668, "step": 6400 }, { "epoch": 32.0, "eval_loss": 0.4827902615070343, "eval_runtime": 248.7074, "eval_samples_per_second": 20.96, "eval_steps_per_second": 2.622, "eval_wer": 0.3185192903847657, "step": 6400 }, { "epoch": 34.0, "learning_rate": 7.320851063829788e-05, "loss": 0.0646, "step": 6800 }, { "epoch": 34.0, "eval_loss": 0.4427362084388733, "eval_runtime": 252.8917, "eval_samples_per_second": 20.614, "eval_steps_per_second": 2.578, "eval_wer": 0.311381457264178, "step": 6800 }, { "epoch": 36.0, "learning_rate": 7.15063829787234e-05, "loss": 0.06, "step": 7200 }, { "epoch": 36.0, "eval_loss": 0.4571715295314789, "eval_runtime": 252.2048, "eval_samples_per_second": 20.67, "eval_steps_per_second": 2.585, "eval_wer": 0.311615911636752, "step": 7200 }, { "epoch": 38.0, "learning_rate": 6.980425531914893e-05, "loss": 0.0584, "step": 7600 }, { "epoch": 38.0, "eval_loss": 0.4944392740726471, "eval_runtime": 251.4845, "eval_samples_per_second": 20.729, "eval_steps_per_second": 2.593, "eval_wer": 0.30804699507645816, "step": 7600 }, { "epoch": 40.0, "learning_rate": 6.810212765957446e-05, "loss": 0.0545, "step": 8000 }, { "epoch": 40.0, "eval_loss": 0.4599160850048065, "eval_runtime": 253.7962, "eval_samples_per_second": 20.54, "eval_steps_per_second": 2.569, "eval_wer": 0.3099747310287337, "step": 8000 }, { "epoch": 42.0, "learning_rate": 6.64e-05, "loss": 0.0536, "step": 8400 }, { "epoch": 42.0, "eval_loss": 0.4586937427520752, "eval_runtime": 256.4427, "eval_samples_per_second": 20.328, "eval_steps_per_second": 2.542, "eval_wer": 0.30338395811081875, "step": 8400 }, { "epoch": 44.0, "learning_rate": 6.469787234042553e-05, "loss": 0.0506, "step": 8800 }, { "epoch": 44.0, "eval_loss": 0.46839669346809387, "eval_runtime": 253.5984, "eval_samples_per_second": 20.556, "eval_steps_per_second": 2.571, "eval_wer": 0.3020814338187407, "step": 8800 }, { "epoch": 46.0, "learning_rate": 6.299574468085107e-05, "loss": 0.0504, "step": 9200 }, { "epoch": 46.0, "eval_loss": 0.4694036841392517, "eval_runtime": 252.7537, "eval_samples_per_second": 20.625, "eval_steps_per_second": 2.58, "eval_wer": 0.30710917758616196, "step": 9200 }, { "epoch": 48.0, "learning_rate": 6.12936170212766e-05, "loss": 0.048, "step": 9600 }, { "epoch": 48.0, "eval_loss": 0.4677378535270691, "eval_runtime": 252.9941, "eval_samples_per_second": 20.605, "eval_steps_per_second": 2.577, "eval_wer": 0.2971578919946857, "step": 9600 }, { "epoch": 50.0, "learning_rate": 5.9591489361702134e-05, "loss": 0.045, "step": 10000 }, { "epoch": 50.0, "eval_loss": 0.46043330430984497, "eval_runtime": 249.2435, "eval_samples_per_second": 20.915, "eval_steps_per_second": 2.616, "eval_wer": 0.29778310365488314, "step": 10000 }, { "epoch": 52.0, "learning_rate": 5.7893617021276604e-05, "loss": 0.0436, "step": 10400 }, { "epoch": 52.0, "eval_loss": 0.4766680598258972, "eval_runtime": 251.6774, "eval_samples_per_second": 20.713, "eval_steps_per_second": 2.591, "eval_wer": 0.29731419490973504, "step": 10400 }, { "epoch": 54.0, "learning_rate": 5.619148936170213e-05, "loss": 0.0422, "step": 10800 }, { "epoch": 54.0, "eval_loss": 0.4668702185153961, "eval_runtime": 252.6671, "eval_samples_per_second": 20.632, "eval_steps_per_second": 2.58, "eval_wer": 0.2975486492823091, "step": 10800 }, { "epoch": 56.0, "learning_rate": 5.448936170212766e-05, "loss": 0.0404, "step": 11200 }, { "epoch": 56.0, "eval_loss": 0.4487351179122925, "eval_runtime": 250.621, "eval_samples_per_second": 20.8, "eval_steps_per_second": 2.602, "eval_wer": 0.2965066298486467, "step": 11200 }, { "epoch": 58.0, "learning_rate": 5.278723404255319e-05, "loss": 0.04, "step": 11600 }, { "epoch": 58.0, "eval_loss": 0.49005138874053955, "eval_runtime": 254.1559, "eval_samples_per_second": 20.511, "eval_steps_per_second": 2.565, "eval_wer": 0.2988511735743872, "step": 11600 }, { "epoch": 60.0, "learning_rate": 5.108510638297873e-05, "loss": 0.0375, "step": 12000 }, { "epoch": 60.0, "eval_loss": 0.46530571579933167, "eval_runtime": 255.1439, "eval_samples_per_second": 20.432, "eval_steps_per_second": 2.555, "eval_wer": 0.2963763774194389, "step": 12000 }, { "epoch": 62.0, "learning_rate": 4.938297872340426e-05, "loss": 0.0353, "step": 12400 }, { "epoch": 62.0, "eval_loss": 0.5175049901008606, "eval_runtime": 253.9576, "eval_samples_per_second": 20.527, "eval_steps_per_second": 2.567, "eval_wer": 0.2963242764477558, "step": 12400 }, { "epoch": 64.0, "learning_rate": 4.768085106382979e-05, "loss": 0.0346, "step": 12800 }, { "epoch": 64.0, "eval_loss": 0.5116600394248962, "eval_runtime": 256.8702, "eval_samples_per_second": 20.294, "eval_steps_per_second": 2.538, "eval_wer": 0.2914528355953839, "step": 12800 }, { "epoch": 66.0, "learning_rate": 4.597872340425532e-05, "loss": 0.0341, "step": 13200 }, { "epoch": 66.0, "eval_loss": 0.4671209454536438, "eval_runtime": 255.8006, "eval_samples_per_second": 20.379, "eval_steps_per_second": 2.549, "eval_wer": 0.2945528434105296, "step": 13200 }, { "epoch": 68.0, "learning_rate": 4.4280851063829785e-05, "loss": 0.0332, "step": 13600 }, { "epoch": 68.0, "eval_loss": 0.48402974009513855, "eval_runtime": 251.9224, "eval_samples_per_second": 20.693, "eval_steps_per_second": 2.588, "eval_wer": 0.2882746763227134, "step": 13600 }, { "epoch": 70.0, "learning_rate": 4.257872340425532e-05, "loss": 0.0307, "step": 14000 }, { "epoch": 70.0, "eval_loss": 0.4735446870326996, "eval_runtime": 251.6191, "eval_samples_per_second": 20.718, "eval_steps_per_second": 2.591, "eval_wer": 0.2853570219084586, "step": 14000 }, { "epoch": 72.0, "learning_rate": 4.0876595744680854e-05, "loss": 0.0296, "step": 14400 }, { "epoch": 72.0, "eval_loss": 0.49666687846183777, "eval_runtime": 255.1334, "eval_samples_per_second": 20.432, "eval_steps_per_second": 2.556, "eval_wer": 0.2870242530023185, "step": 14400 }, { "epoch": 74.0, "learning_rate": 3.917446808510639e-05, "loss": 0.0288, "step": 14800 }, { "epoch": 74.0, "eval_loss": 0.4831916093826294, "eval_runtime": 250.196, "eval_samples_per_second": 20.836, "eval_steps_per_second": 2.606, "eval_wer": 0.28111079271628414, "step": 14800 }, { "epoch": 76.0, "learning_rate": 3.747234042553192e-05, "loss": 0.0278, "step": 15200 }, { "epoch": 76.0, "eval_loss": 0.5073911547660828, "eval_runtime": 251.6593, "eval_samples_per_second": 20.715, "eval_steps_per_second": 2.591, "eval_wer": 0.2897595540156824, "step": 15200 }, { "epoch": 78.0, "learning_rate": 3.577021276595745e-05, "loss": 0.0264, "step": 15600 }, { "epoch": 78.0, "eval_loss": 0.49896690249443054, "eval_runtime": 253.2919, "eval_samples_per_second": 20.581, "eval_steps_per_second": 2.574, "eval_wer": 0.28066793445697763, "step": 15600 }, { "epoch": 80.0, "learning_rate": 3.406808510638298e-05, "loss": 0.0259, "step": 16000 }, { "epoch": 80.0, "eval_loss": 0.4933984577655792, "eval_runtime": 250.843, "eval_samples_per_second": 20.782, "eval_steps_per_second": 2.599, "eval_wer": 0.28478391121994423, "step": 16000 }, { "epoch": 82.0, "learning_rate": 3.2365957446808515e-05, "loss": 0.0256, "step": 16400 }, { "epoch": 82.0, "eval_loss": 0.5108290314674377, "eval_runtime": 254.0809, "eval_samples_per_second": 20.517, "eval_steps_per_second": 2.566, "eval_wer": 0.2834032354703415, "step": 16400 }, { "epoch": 84.0, "learning_rate": 3.066382978723404e-05, "loss": 0.0241, "step": 16800 }, { "epoch": 84.0, "eval_loss": 0.5010423064231873, "eval_runtime": 253.2137, "eval_samples_per_second": 20.587, "eval_steps_per_second": 2.575, "eval_wer": 0.28137129757469975, "step": 16800 }, { "epoch": 86.0, "learning_rate": 2.8961702127659574e-05, "loss": 0.0225, "step": 17200 }, { "epoch": 86.0, "eval_loss": 0.5098214745521545, "eval_runtime": 251.7862, "eval_samples_per_second": 20.704, "eval_steps_per_second": 2.589, "eval_wer": 0.2809544898012348, "step": 17200 }, { "epoch": 88.0, "learning_rate": 2.725957446808511e-05, "loss": 0.0214, "step": 17600 }, { "epoch": 88.0, "eval_loss": 0.5001631379127502, "eval_runtime": 251.7107, "eval_samples_per_second": 20.71, "eval_steps_per_second": 2.59, "eval_wer": 0.27485867611430953, "step": 17600 }, { "epoch": 90.0, "learning_rate": 2.5565957446808516e-05, "loss": 0.0212, "step": 18000 }, { "epoch": 90.0, "eval_loss": 0.5039480328559875, "eval_runtime": 256.5398, "eval_samples_per_second": 20.32, "eval_steps_per_second": 2.542, "eval_wer": 0.27517128194440826, "step": 18000 }, { "epoch": 92.0, "learning_rate": 2.3863829787234044e-05, "loss": 0.0198, "step": 18400 }, { "epoch": 92.0, "eval_loss": 0.5044199824333191, "eval_runtime": 250.6127, "eval_samples_per_second": 20.801, "eval_steps_per_second": 2.602, "eval_wer": 0.2775939771276734, "step": 18400 }, { "epoch": 94.0, "learning_rate": 2.2161702127659575e-05, "loss": 0.0194, "step": 18800 }, { "epoch": 94.0, "eval_loss": 0.5091202855110168, "eval_runtime": 252.5846, "eval_samples_per_second": 20.639, "eval_steps_per_second": 2.581, "eval_wer": 0.2784796936462865, "step": 18800 }, { "epoch": 96.0, "learning_rate": 2.0459574468085106e-05, "loss": 0.0193, "step": 19200 }, { "epoch": 96.0, "eval_loss": 0.5078316926956177, "eval_runtime": 254.2064, "eval_samples_per_second": 20.507, "eval_steps_per_second": 2.565, "eval_wer": 0.2763435538072785, "step": 19200 }, { "epoch": 98.0, "learning_rate": 1.875744680851064e-05, "loss": 0.0189, "step": 19600 }, { "epoch": 98.0, "eval_loss": 0.50649094581604, "eval_runtime": 250.3726, "eval_samples_per_second": 20.821, "eval_steps_per_second": 2.604, "eval_wer": 0.2715502644124313, "step": 19600 }, { "epoch": 100.0, "learning_rate": 1.7055319148936173e-05, "loss": 0.0169, "step": 20000 }, { "epoch": 100.0, "eval_loss": 0.5111807584762573, "eval_runtime": 251.2786, "eval_samples_per_second": 20.746, "eval_steps_per_second": 2.595, "eval_wer": 0.2712897595540157, "step": 20000 }, { "epoch": 102.0, "learning_rate": 1.5353191489361704e-05, "loss": 0.0162, "step": 20400 }, { "epoch": 102.0, "eval_loss": 0.5116817951202393, "eval_runtime": 249.9762, "eval_samples_per_second": 20.854, "eval_steps_per_second": 2.608, "eval_wer": 0.27011748769114546, "step": 20400 }, { "epoch": 104.0, "learning_rate": 1.3651063829787234e-05, "loss": 0.0155, "step": 20800 }, { "epoch": 104.0, "eval_loss": 0.5181553363800049, "eval_runtime": 252.5167, "eval_samples_per_second": 20.644, "eval_steps_per_second": 2.582, "eval_wer": 0.26858050902649333, "step": 20800 }, { "epoch": 106.0, "learning_rate": 1.1948936170212767e-05, "loss": 0.0162, "step": 21200 }, { "epoch": 106.0, "eval_loss": 0.5102177262306213, "eval_runtime": 253.88, "eval_samples_per_second": 20.533, "eval_steps_per_second": 2.568, "eval_wer": 0.26886706437075053, "step": 21200 }, { "epoch": 108.0, "learning_rate": 1.0246808510638298e-05, "loss": 0.015, "step": 21600 }, { "epoch": 108.0, "eval_loss": 0.5183060169219971, "eval_runtime": 256.9895, "eval_samples_per_second": 20.285, "eval_steps_per_second": 2.537, "eval_wer": 0.27050824497876885, "step": 21600 }, { "epoch": 110.0, "learning_rate": 8.54468085106383e-06, "loss": 0.0148, "step": 22000 }, { "epoch": 110.0, "eval_loss": 0.524269700050354, "eval_runtime": 251.4198, "eval_samples_per_second": 20.734, "eval_steps_per_second": 2.593, "eval_wer": 0.269075468257483, "step": 22000 }, { "epoch": 112.0, "learning_rate": 6.846808510638299e-06, "loss": 0.0142, "step": 22400 }, { "epoch": 112.0, "eval_loss": 0.5115846991539001, "eval_runtime": 252.9673, "eval_samples_per_second": 20.607, "eval_steps_per_second": 2.577, "eval_wer": 0.269075468257483, "step": 22400 }, { "epoch": 114.0, "learning_rate": 5.14468085106383e-06, "loss": 0.0133, "step": 22800 }, { "epoch": 114.0, "eval_loss": 0.5274414420127869, "eval_runtime": 253.5713, "eval_samples_per_second": 20.558, "eval_steps_per_second": 2.571, "eval_wer": 0.2676166410503556, "step": 22800 }, { "epoch": 116.0, "learning_rate": 3.4425531914893614e-06, "loss": 0.0138, "step": 23200 }, { "epoch": 116.0, "eval_loss": 0.5216977596282959, "eval_runtime": 253.8491, "eval_samples_per_second": 20.536, "eval_steps_per_second": 2.568, "eval_wer": 0.26722588376273215, "step": 23200 }, { "epoch": 118.0, "learning_rate": 1.7404255319148937e-06, "loss": 0.0135, "step": 23600 }, { "epoch": 118.0, "eval_loss": 0.5230008363723755, "eval_runtime": 257.6327, "eval_samples_per_second": 20.234, "eval_steps_per_second": 2.531, "eval_wer": 0.2673300857060984, "step": 23600 }, { "epoch": 120.0, "learning_rate": 3.8297872340425535e-08, "loss": 0.0134, "step": 24000 }, { "epoch": 120.0, "eval_loss": 0.5197370648384094, "eval_runtime": 253.9198, "eval_samples_per_second": 20.53, "eval_steps_per_second": 2.568, "eval_wer": 0.2666527730742178, "step": 24000 } ], "max_steps": 24000, "num_train_epochs": 120, "total_flos": 1.951590305315055e+20, "trial_name": null, "trial_params": null }