{ "best_metric": 41.11150694691842, "best_model_checkpoint": "whisper_large_v2_adam_8bit/checkpoint-485", "epoch": 17.962962962962962, "global_step": 485, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 8.000000000000001e-07, "loss": 1.0874, "step": 5 }, { "epoch": 0.19, "eval_loss": 1.0718352794647217, "eval_runtime": 305.4443, "eval_samples_per_second": 0.262, "eval_steps_per_second": 0.131, "eval_wer": 54.82721767011044, "step": 5 }, { "epoch": 0.37, "learning_rate": 1.8000000000000001e-06, "loss": 1.0169, "step": 10 }, { "epoch": 0.37, "eval_loss": 0.895382285118103, "eval_runtime": 335.7721, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 57.14285714285714, "step": 10 }, { "epoch": 0.56, "learning_rate": 2.6e-06, "loss": 0.8095, "step": 15 }, { "epoch": 0.56, "eval_loss": 0.7291234135627747, "eval_runtime": 341.1764, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.117, "eval_wer": 59.707873174207336, "step": 15 }, { "epoch": 0.74, "learning_rate": 3.6000000000000003e-06, "loss": 0.6586, "step": 20 }, { "epoch": 0.74, "eval_loss": 0.5784670114517212, "eval_runtime": 208.1203, "eval_samples_per_second": 0.384, "eval_steps_per_second": 0.192, "eval_wer": 83.07801923762024, "step": 20 }, { "epoch": 0.93, "learning_rate": 4.600000000000001e-06, "loss": 0.5849, "step": 25 }, { "epoch": 0.93, "eval_loss": 0.5224391222000122, "eval_runtime": 251.382, "eval_samples_per_second": 0.318, "eval_steps_per_second": 0.159, "eval_wer": 91.73494834342715, "step": 25 }, { "epoch": 1.11, "learning_rate": 5.600000000000001e-06, "loss": 0.4623, "step": 30 }, { "epoch": 1.11, "eval_loss": 0.48158684372901917, "eval_runtime": 279.2357, "eval_samples_per_second": 0.286, "eval_steps_per_second": 0.143, "eval_wer": 72.99608122550765, "step": 30 }, { "epoch": 1.3, "learning_rate": 6.600000000000001e-06, "loss": 0.4459, "step": 35 }, { "epoch": 1.3, "eval_loss": 0.4286937713623047, "eval_runtime": 232.3372, "eval_samples_per_second": 0.344, "eval_steps_per_second": 0.172, "eval_wer": 76.0242251514072, "step": 35 }, { "epoch": 1.48, "learning_rate": 7.600000000000001e-06, "loss": 0.3496, "step": 40 }, { "epoch": 1.48, "eval_loss": 0.4074183404445648, "eval_runtime": 289.554, "eval_samples_per_second": 0.276, "eval_steps_per_second": 0.138, "eval_wer": 48.6284289276808, "step": 40 }, { "epoch": 1.67, "learning_rate": 8.6e-06, "loss": 0.3964, "step": 45 }, { "epoch": 1.67, "eval_loss": 0.3671049177646637, "eval_runtime": 356.7998, "eval_samples_per_second": 0.224, "eval_steps_per_second": 0.112, "eval_wer": 57.67723548272177, "step": 45 }, { "epoch": 1.85, "learning_rate": 9.600000000000001e-06, "loss": 0.3506, "step": 50 }, { "epoch": 1.85, "eval_loss": 0.37803885340690613, "eval_runtime": 338.1245, "eval_samples_per_second": 0.237, "eval_steps_per_second": 0.118, "eval_wer": 49.76843605272533, "step": 50 }, { "epoch": 2.04, "learning_rate": 9.96842105263158e-06, "loss": 0.377, "step": 55 }, { "epoch": 2.04, "eval_loss": 0.35833048820495605, "eval_runtime": 344.9289, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.116, "eval_wer": 56.21660135375846, "step": 55 }, { "epoch": 2.22, "learning_rate": 9.915789473684211e-06, "loss": 0.2766, "step": 60 }, { "epoch": 2.22, "eval_loss": 0.37928158044815063, "eval_runtime": 350.8056, "eval_samples_per_second": 0.228, "eval_steps_per_second": 0.114, "eval_wer": 83.0423940149626, "step": 60 }, { "epoch": 2.41, "learning_rate": 9.863157894736843e-06, "loss": 0.2522, "step": 65 }, { "epoch": 2.41, "eval_loss": 0.3648552894592285, "eval_runtime": 343.605, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.116, "eval_wer": 69.89668685429284, "step": 65 }, { "epoch": 2.59, "learning_rate": 9.810526315789475e-06, "loss": 0.2793, "step": 70 }, { "epoch": 2.59, "eval_loss": 0.36223381757736206, "eval_runtime": 325.8454, "eval_samples_per_second": 0.246, "eval_steps_per_second": 0.123, "eval_wer": 48.37905236907731, "step": 70 }, { "epoch": 2.78, "learning_rate": 9.757894736842106e-06, "loss": 0.27, "step": 75 }, { "epoch": 2.78, "eval_loss": 0.38781291246414185, "eval_runtime": 331.2847, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 48.09405058781617, "step": 75 }, { "epoch": 2.96, "learning_rate": 9.715789473684212e-06, "loss": 0.2624, "step": 80 }, { "epoch": 2.96, "eval_loss": 0.38262635469436646, "eval_runtime": 329.4177, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.121, "eval_wer": 47.167794798717495, "step": 80 }, { "epoch": 3.15, "learning_rate": 9.663157894736843e-06, "loss": 0.225, "step": 85 }, { "epoch": 3.15, "eval_loss": 0.38125258684158325, "eval_runtime": 310.0949, "eval_samples_per_second": 0.258, "eval_steps_per_second": 0.129, "eval_wer": 54.39971499821874, "step": 85 }, { "epoch": 3.33, "learning_rate": 9.610526315789475e-06, "loss": 0.2062, "step": 90 }, { "epoch": 3.33, "eval_loss": 0.3962409198284149, "eval_runtime": 299.3235, "eval_samples_per_second": 0.267, "eval_steps_per_second": 0.134, "eval_wer": 72.92483078019238, "step": 90 }, { "epoch": 3.52, "learning_rate": 9.557894736842107e-06, "loss": 0.192, "step": 95 }, { "epoch": 3.52, "eval_loss": 0.3863430619239807, "eval_runtime": 332.6447, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 59.17349483434271, "step": 95 }, { "epoch": 3.7, "learning_rate": 9.505263157894738e-06, "loss": 0.224, "step": 100 }, { "epoch": 3.7, "eval_loss": 0.38708847761154175, "eval_runtime": 330.6389, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.121, "eval_wer": 54.39971499821874, "step": 100 }, { "epoch": 3.89, "learning_rate": 9.452631578947368e-06, "loss": 0.2127, "step": 105 }, { "epoch": 3.89, "eval_loss": 0.41648903489112854, "eval_runtime": 357.2142, "eval_samples_per_second": 0.224, "eval_steps_per_second": 0.112, "eval_wer": 70.07481296758104, "step": 105 }, { "epoch": 4.07, "learning_rate": 9.4e-06, "loss": 0.2033, "step": 110 }, { "epoch": 4.07, "eval_loss": 0.39047971367836, "eval_runtime": 316.4665, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.126, "eval_wer": 50.69469184182401, "step": 110 }, { "epoch": 4.26, "learning_rate": 9.347368421052633e-06, "loss": 0.1426, "step": 115 }, { "epoch": 4.26, "eval_loss": 0.41405850648880005, "eval_runtime": 326.6943, "eval_samples_per_second": 0.245, "eval_steps_per_second": 0.122, "eval_wer": 44.53152832205201, "step": 115 }, { "epoch": 4.44, "learning_rate": 9.294736842105265e-06, "loss": 0.1737, "step": 120 }, { "epoch": 4.44, "eval_loss": 0.4162759780883789, "eval_runtime": 334.7907, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.119, "eval_wer": 47.52404702529391, "step": 120 }, { "epoch": 4.63, "learning_rate": 9.242105263157896e-06, "loss": 0.1404, "step": 125 }, { "epoch": 4.63, "eval_loss": 0.4179295599460602, "eval_runtime": 313.7669, "eval_samples_per_second": 0.255, "eval_steps_per_second": 0.127, "eval_wer": 47.951549697185605, "step": 125 }, { "epoch": 4.81, "learning_rate": 9.189473684210526e-06, "loss": 0.1738, "step": 130 }, { "epoch": 4.81, "eval_loss": 0.4070858061313629, "eval_runtime": 322.5875, "eval_samples_per_second": 0.248, "eval_steps_per_second": 0.124, "eval_wer": 43.747773423583894, "step": 130 }, { "epoch": 5.0, "learning_rate": 9.136842105263158e-06, "loss": 0.1852, "step": 135 }, { "epoch": 5.0, "eval_loss": 0.3889687657356262, "eval_runtime": 331.8197, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 65.94228713929462, "step": 135 }, { "epoch": 5.19, "learning_rate": 9.08421052631579e-06, "loss": 0.1111, "step": 140 }, { "epoch": 5.19, "eval_loss": 0.4467521607875824, "eval_runtime": 340.6568, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.117, "eval_wer": 47.5952974706092, "step": 140 }, { "epoch": 5.37, "learning_rate": 9.031578947368423e-06, "loss": 0.1642, "step": 145 }, { "epoch": 5.37, "eval_loss": 0.47300252318382263, "eval_runtime": 330.5579, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.121, "eval_wer": 53.152832205201285, "step": 145 }, { "epoch": 5.56, "learning_rate": 8.978947368421055e-06, "loss": 0.1552, "step": 150 }, { "epoch": 5.56, "eval_loss": 0.44687384366989136, "eval_runtime": 317.2285, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.126, "eval_wer": 65.62166013537585, "step": 150 }, { "epoch": 5.74, "learning_rate": 8.926315789473685e-06, "loss": 0.1497, "step": 155 }, { "epoch": 5.74, "eval_loss": 0.44148778915405273, "eval_runtime": 315.1611, "eval_samples_per_second": 0.254, "eval_steps_per_second": 0.127, "eval_wer": 48.05842536515853, "step": 155 }, { "epoch": 5.93, "learning_rate": 8.873684210526316e-06, "loss": 0.1419, "step": 160 }, { "epoch": 5.93, "eval_loss": 0.4392489492893219, "eval_runtime": 336.253, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 43.9258995368721, "step": 160 }, { "epoch": 6.11, "learning_rate": 8.821052631578948e-06, "loss": 0.1176, "step": 165 }, { "epoch": 6.11, "eval_loss": 0.44891494512557983, "eval_runtime": 328.7909, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.122, "eval_wer": 41.1827573922337, "step": 165 }, { "epoch": 6.3, "learning_rate": 8.76842105263158e-06, "loss": 0.0996, "step": 170 }, { "epoch": 6.3, "eval_loss": 0.505344569683075, "eval_runtime": 338.587, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.118, "eval_wer": 46.81154257214107, "step": 170 }, { "epoch": 6.48, "learning_rate": 8.715789473684211e-06, "loss": 0.1376, "step": 175 }, { "epoch": 6.48, "eval_loss": 0.48324888944625854, "eval_runtime": 326.4874, "eval_samples_per_second": 0.245, "eval_steps_per_second": 0.123, "eval_wer": 52.29782686141788, "step": 175 }, { "epoch": 6.67, "learning_rate": 8.663157894736843e-06, "loss": 0.1093, "step": 180 }, { "epoch": 6.67, "eval_loss": 0.4419935643672943, "eval_runtime": 340.888, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.117, "eval_wer": 54.39971499821874, "step": 180 }, { "epoch": 6.85, "learning_rate": 8.610526315789474e-06, "loss": 0.1325, "step": 185 }, { "epoch": 6.85, "eval_loss": 0.4463180899620056, "eval_runtime": 333.0453, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 45.92091200570004, "step": 185 }, { "epoch": 7.04, "learning_rate": 8.557894736842106e-06, "loss": 0.1151, "step": 190 }, { "epoch": 7.04, "eval_loss": 0.4659479558467865, "eval_runtime": 321.4982, "eval_samples_per_second": 0.249, "eval_steps_per_second": 0.124, "eval_wer": 46.09903811898825, "step": 190 }, { "epoch": 7.22, "learning_rate": 8.505263157894738e-06, "loss": 0.0892, "step": 195 }, { "epoch": 7.22, "eval_loss": 0.4642786979675293, "eval_runtime": 304.1157, "eval_samples_per_second": 0.263, "eval_steps_per_second": 0.132, "eval_wer": 61.9878874242964, "step": 195 }, { "epoch": 7.41, "learning_rate": 8.45263157894737e-06, "loss": 0.0819, "step": 200 }, { "epoch": 7.41, "eval_loss": 0.4606548845767975, "eval_runtime": 314.9466, "eval_samples_per_second": 0.254, "eval_steps_per_second": 0.127, "eval_wer": 49.162807267545425, "step": 200 }, { "epoch": 7.59, "learning_rate": 8.400000000000001e-06, "loss": 0.0881, "step": 205 }, { "epoch": 7.59, "eval_loss": 0.49518561363220215, "eval_runtime": 333.9165, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 47.167794798717495, "step": 205 }, { "epoch": 7.78, "learning_rate": 8.347368421052633e-06, "loss": 0.0902, "step": 210 }, { "epoch": 7.78, "eval_loss": 0.48233914375305176, "eval_runtime": 323.2853, "eval_samples_per_second": 0.247, "eval_steps_per_second": 0.124, "eval_wer": 47.98717491984325, "step": 210 }, { "epoch": 7.96, "learning_rate": 8.294736842105264e-06, "loss": 0.1125, "step": 215 }, { "epoch": 7.96, "eval_loss": 0.48526009917259216, "eval_runtime": 331.099, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.121, "eval_wer": 59.45849661560385, "step": 215 }, { "epoch": 8.15, "learning_rate": 8.242105263157896e-06, "loss": 0.0668, "step": 220 }, { "epoch": 8.15, "eval_loss": 0.5275024175643921, "eval_runtime": 310.2348, "eval_samples_per_second": 0.258, "eval_steps_per_second": 0.129, "eval_wer": 58.42536515853224, "step": 220 }, { "epoch": 8.33, "learning_rate": 8.189473684210527e-06, "loss": 0.0723, "step": 225 }, { "epoch": 8.33, "eval_loss": 0.532504141330719, "eval_runtime": 331.9501, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 46.775917349483436, "step": 225 }, { "epoch": 8.52, "learning_rate": 8.136842105263159e-06, "loss": 0.0825, "step": 230 }, { "epoch": 8.52, "eval_loss": 0.5000892877578735, "eval_runtime": 328.9744, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.122, "eval_wer": 44.21090131813324, "step": 230 }, { "epoch": 8.7, "learning_rate": 8.08421052631579e-06, "loss": 0.0859, "step": 235 }, { "epoch": 8.7, "eval_loss": 0.5018569827079773, "eval_runtime": 321.0271, "eval_samples_per_second": 0.249, "eval_steps_per_second": 0.125, "eval_wer": 55.68222301389384, "step": 235 }, { "epoch": 8.89, "learning_rate": 8.03157894736842e-06, "loss": 0.0838, "step": 240 }, { "epoch": 8.89, "eval_loss": 0.5195188522338867, "eval_runtime": 326.5564, "eval_samples_per_second": 0.245, "eval_steps_per_second": 0.122, "eval_wer": 50.65906661916637, "step": 240 }, { "epoch": 9.07, "learning_rate": 7.978947368421052e-06, "loss": 0.0634, "step": 245 }, { "epoch": 9.07, "eval_loss": 0.5121615529060364, "eval_runtime": 333.3682, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 45.42215888849305, "step": 245 }, { "epoch": 9.26, "learning_rate": 7.926315789473686e-06, "loss": 0.0611, "step": 250 }, { "epoch": 9.26, "eval_loss": 0.53472900390625, "eval_runtime": 334.7977, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.119, "eval_wer": 45.38653366583541, "step": 250 }, { "epoch": 9.44, "learning_rate": 7.873684210526317e-06, "loss": 0.0742, "step": 255 }, { "epoch": 9.44, "eval_loss": 0.5380967855453491, "eval_runtime": 341.6365, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.117, "eval_wer": 46.170288564303526, "step": 255 }, { "epoch": 9.63, "learning_rate": 7.821052631578949e-06, "loss": 0.0544, "step": 260 }, { "epoch": 9.63, "eval_loss": 0.5325397253036499, "eval_runtime": 344.44, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.116, "eval_wer": 52.08407552547203, "step": 260 }, { "epoch": 9.81, "learning_rate": 7.768421052631579e-06, "loss": 0.0832, "step": 265 }, { "epoch": 9.81, "eval_loss": 0.5298484563827515, "eval_runtime": 338.7288, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.118, "eval_wer": 46.81154257214107, "step": 265 }, { "epoch": 10.0, "learning_rate": 7.71578947368421e-06, "loss": 0.079, "step": 270 }, { "epoch": 10.0, "eval_loss": 0.5149779319763184, "eval_runtime": 322.201, "eval_samples_per_second": 0.248, "eval_steps_per_second": 0.124, "eval_wer": 49.447809048806555, "step": 270 }, { "epoch": 10.19, "learning_rate": 7.663157894736842e-06, "loss": 0.0692, "step": 275 }, { "epoch": 10.19, "eval_loss": 0.5358998775482178, "eval_runtime": 304.2081, "eval_samples_per_second": 0.263, "eval_steps_per_second": 0.131, "eval_wer": 50.3384396152476, "step": 275 }, { "epoch": 10.37, "learning_rate": 7.610526315789474e-06, "loss": 0.0549, "step": 280 }, { "epoch": 10.37, "eval_loss": 0.5207683444023132, "eval_runtime": 333.629, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 49.12718204488778, "step": 280 }, { "epoch": 10.56, "learning_rate": 7.557894736842106e-06, "loss": 0.0533, "step": 285 }, { "epoch": 10.56, "eval_loss": 0.528590202331543, "eval_runtime": 332.7917, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 47.02529390808692, "step": 285 }, { "epoch": 10.74, "learning_rate": 7.505263157894738e-06, "loss": 0.0821, "step": 290 }, { "epoch": 10.74, "eval_loss": 0.51960289478302, "eval_runtime": 332.8933, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 46.24153900961881, "step": 290 }, { "epoch": 10.93, "learning_rate": 7.4526315789473695e-06, "loss": 0.0672, "step": 295 }, { "epoch": 10.93, "eval_loss": 0.5139871835708618, "eval_runtime": 342.8201, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.117, "eval_wer": 46.45529034556466, "step": 295 }, { "epoch": 11.11, "learning_rate": 7.4e-06, "loss": 0.0694, "step": 300 }, { "epoch": 11.11, "eval_loss": 0.5785766243934631, "eval_runtime": 334.4012, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.12, "eval_wer": 44.92340577128607, "step": 300 }, { "epoch": 11.3, "learning_rate": 7.347368421052632e-06, "loss": 0.09, "step": 305 }, { "epoch": 11.3, "eval_loss": 0.584839940071106, "eval_runtime": 343.2341, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.117, "eval_wer": 49.59030993943712, "step": 305 }, { "epoch": 11.48, "learning_rate": 7.2947368421052636e-06, "loss": 0.0824, "step": 310 }, { "epoch": 11.48, "eval_loss": 0.5461788773536682, "eval_runtime": 330.7912, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.121, "eval_wer": 45.24403277520484, "step": 310 }, { "epoch": 11.67, "learning_rate": 7.242105263157896e-06, "loss": 0.0805, "step": 315 }, { "epoch": 11.67, "eval_loss": 0.537391185760498, "eval_runtime": 352.6332, "eval_samples_per_second": 0.227, "eval_steps_per_second": 0.113, "eval_wer": 54.25721410758817, "step": 315 }, { "epoch": 11.85, "learning_rate": 7.189473684210527e-06, "loss": 0.0688, "step": 320 }, { "epoch": 11.85, "eval_loss": 0.5511082410812378, "eval_runtime": 336.5436, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 42.8927680798005, "step": 320 }, { "epoch": 12.04, "learning_rate": 7.1368421052631585e-06, "loss": 0.048, "step": 325 }, { "epoch": 12.04, "eval_loss": 0.5636075139045715, "eval_runtime": 319.3896, "eval_samples_per_second": 0.25, "eval_steps_per_second": 0.125, "eval_wer": 44.81653010331315, "step": 325 }, { "epoch": 12.22, "learning_rate": 7.08421052631579e-06, "loss": 0.054, "step": 330 }, { "epoch": 12.22, "eval_loss": 0.5698742866516113, "eval_runtime": 345.1573, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.116, "eval_wer": 46.88279301745636, "step": 330 }, { "epoch": 12.41, "learning_rate": 7.031578947368422e-06, "loss": 0.0535, "step": 335 }, { "epoch": 12.41, "eval_loss": 0.5786362886428833, "eval_runtime": 331.7186, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 42.60776629853937, "step": 335 }, { "epoch": 12.59, "learning_rate": 6.9789473684210525e-06, "loss": 0.0644, "step": 340 }, { "epoch": 12.59, "eval_loss": 0.5574811697006226, "eval_runtime": 339.8046, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.118, "eval_wer": 45.956537228357675, "step": 340 }, { "epoch": 12.78, "learning_rate": 6.926315789473684e-06, "loss": 0.0618, "step": 345 }, { "epoch": 12.78, "eval_loss": 0.5477833151817322, "eval_runtime": 337.626, "eval_samples_per_second": 0.237, "eval_steps_per_second": 0.118, "eval_wer": 46.918418240114, "step": 345 }, { "epoch": 12.96, "learning_rate": 6.873684210526317e-06, "loss": 0.0832, "step": 350 }, { "epoch": 12.96, "eval_loss": 0.6038811802864075, "eval_runtime": 336.2086, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 44.068400427502674, "step": 350 }, { "epoch": 13.15, "learning_rate": 6.821052631578948e-06, "loss": 0.0592, "step": 355 }, { "epoch": 13.15, "eval_loss": 0.6127667427062988, "eval_runtime": 331.5309, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 42.32276451727823, "step": 355 }, { "epoch": 13.33, "learning_rate": 6.76842105263158e-06, "loss": 0.0619, "step": 360 }, { "epoch": 13.33, "eval_loss": 0.591279149055481, "eval_runtime": 332.3235, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.12, "eval_wer": 45.03028143925899, "step": 360 }, { "epoch": 13.52, "learning_rate": 6.715789473684211e-06, "loss": 0.0998, "step": 365 }, { "epoch": 13.52, "eval_loss": 0.5318233966827393, "eval_runtime": 332.2782, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.12, "eval_wer": 42.75026718916993, "step": 365 }, { "epoch": 13.7, "learning_rate": 6.663157894736842e-06, "loss": 0.05, "step": 370 }, { "epoch": 13.7, "eval_loss": 0.5774287581443787, "eval_runtime": 329.9705, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.121, "eval_wer": 42.82151763448522, "step": 370 }, { "epoch": 13.89, "learning_rate": 6.610526315789474e-06, "loss": 0.1014, "step": 375 }, { "epoch": 13.89, "eval_loss": 0.5849052667617798, "eval_runtime": 328.3008, "eval_samples_per_second": 0.244, "eval_steps_per_second": 0.122, "eval_wer": 48.05842536515853, "step": 375 }, { "epoch": 14.07, "learning_rate": 6.557894736842106e-06, "loss": 0.0494, "step": 380 }, { "epoch": 14.07, "eval_loss": 0.5518860816955566, "eval_runtime": 318.5974, "eval_samples_per_second": 0.251, "eval_steps_per_second": 0.126, "eval_wer": 48.69967937299608, "step": 380 }, { "epoch": 14.26, "learning_rate": 6.505263157894738e-06, "loss": 0.0456, "step": 385 }, { "epoch": 14.26, "eval_loss": 0.5957409739494324, "eval_runtime": 335.9574, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 42.75026718916993, "step": 385 }, { "epoch": 14.44, "learning_rate": 6.452631578947369e-06, "loss": 0.0957, "step": 390 }, { "epoch": 14.44, "eval_loss": 0.5850934982299805, "eval_runtime": 347.6573, "eval_samples_per_second": 0.23, "eval_steps_per_second": 0.115, "eval_wer": 48.80655504096901, "step": 390 }, { "epoch": 14.63, "learning_rate": 6.4000000000000006e-06, "loss": 0.0514, "step": 395 }, { "epoch": 14.63, "eval_loss": 0.6157152652740479, "eval_runtime": 332.3513, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.12, "eval_wer": 49.661560384752406, "step": 395 }, { "epoch": 14.81, "learning_rate": 6.347368421052632e-06, "loss": 0.1045, "step": 400 }, { "epoch": 14.81, "eval_loss": 0.5892783999443054, "eval_runtime": 341.0177, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.117, "eval_wer": 52.6540790879943, "step": 400 }, { "epoch": 15.0, "learning_rate": 6.294736842105264e-06, "loss": 0.0485, "step": 405 }, { "epoch": 15.0, "eval_loss": 0.5818936228752136, "eval_runtime": 334.6692, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.12, "eval_wer": 50.9440684004275, "step": 405 }, { "epoch": 15.19, "learning_rate": 6.242105263157895e-06, "loss": 0.0705, "step": 410 }, { "epoch": 15.19, "eval_loss": 0.5869933366775513, "eval_runtime": 338.9314, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.118, "eval_wer": 45.77841111506947, "step": 410 }, { "epoch": 15.37, "learning_rate": 6.189473684210526e-06, "loss": 0.0354, "step": 415 }, { "epoch": 15.37, "eval_loss": 0.620836615562439, "eval_runtime": 340.9346, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.117, "eval_wer": 47.20342002137513, "step": 415 }, { "epoch": 15.56, "learning_rate": 6.136842105263159e-06, "loss": 0.0366, "step": 420 }, { "epoch": 15.56, "eval_loss": 0.6018834114074707, "eval_runtime": 339.0784, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.118, "eval_wer": 46.02778767367296, "step": 420 }, { "epoch": 15.74, "learning_rate": 6.08421052631579e-06, "loss": 0.0422, "step": 425 }, { "epoch": 15.74, "eval_loss": 0.5831278562545776, "eval_runtime": 332.3516, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.12, "eval_wer": 44.92340577128607, "step": 425 }, { "epoch": 15.93, "learning_rate": 6.031578947368422e-06, "loss": 0.1133, "step": 430 }, { "epoch": 15.93, "eval_loss": 0.6038104295730591, "eval_runtime": 333.9842, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 44.13965087281795, "step": 430 }, { "epoch": 16.11, "learning_rate": 5.978947368421053e-06, "loss": 0.0515, "step": 435 }, { "epoch": 16.11, "eval_loss": 0.6272233724594116, "eval_runtime": 331.7109, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 43.53402208763805, "step": 435 }, { "epoch": 16.3, "learning_rate": 5.9263157894736844e-06, "loss": 0.0709, "step": 440 }, { "epoch": 16.3, "eval_loss": 0.6419104337692261, "eval_runtime": 336.7239, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.119, "eval_wer": 45.92091200570004, "step": 440 }, { "epoch": 16.48, "learning_rate": 5.873684210526316e-06, "loss": 0.0978, "step": 445 }, { "epoch": 16.48, "eval_loss": 0.6428846120834351, "eval_runtime": 334.722, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.12, "eval_wer": 46.59779123619522, "step": 445 }, { "epoch": 16.67, "learning_rate": 5.8210526315789486e-06, "loss": 0.052, "step": 450 }, { "epoch": 16.67, "eval_loss": 0.6158877611160278, "eval_runtime": 340.1528, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.118, "eval_wer": 45.2084075525472, "step": 450 }, { "epoch": 16.85, "learning_rate": 5.76842105263158e-06, "loss": 0.0508, "step": 455 }, { "epoch": 16.85, "eval_loss": 0.631538987159729, "eval_runtime": 334.4872, "eval_samples_per_second": 0.239, "eval_steps_per_second": 0.12, "eval_wer": 45.24403277520484, "step": 455 }, { "epoch": 17.04, "learning_rate": 5.715789473684211e-06, "loss": 0.0453, "step": 460 }, { "epoch": 17.04, "eval_loss": 0.6536934971809387, "eval_runtime": 327.9543, "eval_samples_per_second": 0.244, "eval_steps_per_second": 0.122, "eval_wer": 48.87780548628429, "step": 460 }, { "epoch": 17.22, "learning_rate": 5.663157894736843e-06, "loss": 0.0454, "step": 465 }, { "epoch": 17.22, "eval_loss": 0.6028529405593872, "eval_runtime": 337.2385, "eval_samples_per_second": 0.237, "eval_steps_per_second": 0.119, "eval_wer": 48.770929818311366, "step": 465 }, { "epoch": 17.41, "learning_rate": 5.610526315789474e-06, "loss": 0.0662, "step": 470 }, { "epoch": 17.41, "eval_loss": 0.5731960535049438, "eval_runtime": 331.8614, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.121, "eval_wer": 45.885286783042396, "step": 470 }, { "epoch": 17.59, "learning_rate": 5.557894736842105e-06, "loss": 0.0365, "step": 475 }, { "epoch": 17.59, "eval_loss": 0.6006522178649902, "eval_runtime": 320.1815, "eval_samples_per_second": 0.25, "eval_steps_per_second": 0.125, "eval_wer": 43.89027431421446, "step": 475 }, { "epoch": 17.78, "learning_rate": 5.505263157894737e-06, "loss": 0.0728, "step": 480 }, { "epoch": 17.78, "eval_loss": 0.6429422497749329, "eval_runtime": 333.1015, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.12, "eval_wer": 44.67402921268258, "step": 480 }, { "epoch": 17.96, "learning_rate": 5.452631578947369e-06, "loss": 0.0334, "step": 485 }, { "epoch": 17.96, "eval_loss": 0.648144543170929, "eval_runtime": 320.2927, "eval_samples_per_second": 0.25, "eval_steps_per_second": 0.125, "eval_wer": 41.11150694691842, "step": 485 } ], "max_steps": 1000, "num_train_epochs": 38, "total_flos": 1.6475807121408e+19, "trial_name": null, "trial_params": null }