{ "best_metric": 7.136237256719184, "best_model_checkpoint": "CHECKPOINTS/checkpoint-59160", "epoch": 1.0000319284802044, "global_step": 62640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9310344827586207e-06, "loss": 2.0236, "step": 116 }, { "epoch": 0.0, "learning_rate": 3.931034482758621e-06, "loss": 0.5343, "step": 232 }, { "epoch": 0.01, "learning_rate": 5.9310344827586205e-06, "loss": 0.4203, "step": 348 }, { "epoch": 0.01, "learning_rate": 7.93103448275862e-06, "loss": 0.3676, "step": 464 }, { "epoch": 0.01, "learning_rate": 9.931034482758622e-06, "loss": 0.3277, "step": 580 }, { "epoch": 0.01, "eval_loss": 0.42336705327033997, "eval_runtime": 820.7641, "eval_samples_per_second": 2.088, "eval_steps_per_second": 0.262, "eval_wer": 58.4491813407476, "step": 580 }, { "epoch": 0.01, "learning_rate": 9.981952948759265e-06, "loss": 0.3055, "step": 696 }, { "epoch": 0.01, "learning_rate": 9.963261359974218e-06, "loss": 0.2803, "step": 812 }, { "epoch": 0.01, "learning_rate": 9.944569771189173e-06, "loss": 0.2546, "step": 928 }, { "epoch": 0.02, "learning_rate": 9.925878182404126e-06, "loss": 0.2365, "step": 1044 }, { "epoch": 0.02, "learning_rate": 9.90718659361908e-06, "loss": 0.2238, "step": 1160 }, { "epoch": 0.02, "eval_loss": 0.3460919260978699, "eval_runtime": 799.3208, "eval_samples_per_second": 2.144, "eval_steps_per_second": 0.269, "eval_wer": 42.02450828956853, "step": 1160 }, { "epoch": 0.02, "learning_rate": 9.888495004834032e-06, "loss": 0.2156, "step": 1276 }, { "epoch": 0.02, "learning_rate": 9.869803416048985e-06, "loss": 0.2058, "step": 1392 }, { "epoch": 0.02, "learning_rate": 9.85111182726394e-06, "loss": 0.1976, "step": 1508 }, { "epoch": 0.03, "learning_rate": 9.832420238478893e-06, "loss": 0.1788, "step": 1624 }, { "epoch": 0.03, "learning_rate": 9.813728649693846e-06, "loss": 0.1861, "step": 1740 }, { "epoch": 0.03, "eval_loss": 0.2978155016899109, "eval_runtime": 789.7073, "eval_samples_per_second": 2.17, "eval_steps_per_second": 0.272, "eval_wer": 33.662856554422824, "step": 1740 }, { "epoch": 0.03, "learning_rate": 9.795037060908799e-06, "loss": 0.174, "step": 1856 }, { "epoch": 0.03, "learning_rate": 9.776345472123752e-06, "loss": 0.1739, "step": 1972 }, { "epoch": 0.03, "learning_rate": 9.757653883338705e-06, "loss": 0.1683, "step": 2088 }, { "epoch": 0.04, "learning_rate": 9.738962294553658e-06, "loss": 0.1606, "step": 2204 }, { "epoch": 0.04, "learning_rate": 9.720270705768611e-06, "loss": 0.1572, "step": 2320 }, { "epoch": 0.04, "eval_loss": 0.27300673723220825, "eval_runtime": 786.9048, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 34.06446298012563, "step": 2320 }, { "epoch": 0.04, "learning_rate": 9.701579116983564e-06, "loss": 0.1498, "step": 2436 }, { "epoch": 0.04, "learning_rate": 9.682887528198517e-06, "loss": 0.1475, "step": 2552 }, { "epoch": 0.04, "learning_rate": 9.664195939413472e-06, "loss": 0.1421, "step": 2668 }, { "epoch": 0.04, "learning_rate": 9.645504350628425e-06, "loss": 0.1416, "step": 2784 }, { "epoch": 0.05, "learning_rate": 9.626812761843378e-06, "loss": 0.1416, "step": 2900 }, { "epoch": 0.05, "eval_loss": 0.2491539865732193, "eval_runtime": 790.6324, "eval_samples_per_second": 2.168, "eval_steps_per_second": 0.272, "eval_wer": 33.508392544537124, "step": 2900 }, { "epoch": 0.05, "learning_rate": 9.608121173058331e-06, "loss": 0.1359, "step": 3016 }, { "epoch": 0.05, "learning_rate": 9.589429584273284e-06, "loss": 0.1324, "step": 3132 }, { "epoch": 0.05, "learning_rate": 9.570737995488239e-06, "loss": 0.1388, "step": 3248 }, { "epoch": 0.05, "learning_rate": 9.552046406703192e-06, "loss": 0.1353, "step": 3364 }, { "epoch": 0.06, "learning_rate": 9.533354817918145e-06, "loss": 0.1293, "step": 3480 }, { "epoch": 0.06, "eval_loss": 0.24061298370361328, "eval_runtime": 785.4078, "eval_samples_per_second": 2.182, "eval_steps_per_second": 0.274, "eval_wer": 32.01524044897539, "step": 3480 }, { "epoch": 0.06, "learning_rate": 9.514663229133098e-06, "loss": 0.1293, "step": 3596 }, { "epoch": 0.06, "learning_rate": 9.49597164034805e-06, "loss": 0.1234, "step": 3712 }, { "epoch": 0.06, "learning_rate": 9.477280051563005e-06, "loss": 0.1165, "step": 3828 }, { "epoch": 0.06, "learning_rate": 9.458588462777958e-06, "loss": 0.1221, "step": 3944 }, { "epoch": 0.06, "learning_rate": 9.439896873992912e-06, "loss": 0.1212, "step": 4060 }, { "epoch": 0.06, "eval_loss": 0.24251143634319305, "eval_runtime": 789.4621, "eval_samples_per_second": 2.171, "eval_steps_per_second": 0.272, "eval_wer": 31.366491607455465, "step": 4060 }, { "epoch": 0.07, "learning_rate": 9.421205285207865e-06, "loss": 0.121, "step": 4176 }, { "epoch": 0.07, "learning_rate": 9.402513696422818e-06, "loss": 0.1118, "step": 4292 }, { "epoch": 0.07, "learning_rate": 9.38382210763777e-06, "loss": 0.1093, "step": 4408 }, { "epoch": 0.07, "learning_rate": 9.365130518852724e-06, "loss": 0.1155, "step": 4524 }, { "epoch": 0.07, "learning_rate": 9.346438930067677e-06, "loss": 0.1102, "step": 4640 }, { "epoch": 0.07, "eval_loss": 0.22580939531326294, "eval_runtime": 787.0585, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 30.810421171866953, "step": 4640 }, { "epoch": 0.08, "learning_rate": 9.32774734128263e-06, "loss": 0.1057, "step": 4756 }, { "epoch": 0.08, "learning_rate": 9.309055752497583e-06, "loss": 0.1053, "step": 4872 }, { "epoch": 0.08, "learning_rate": 9.290364163712537e-06, "loss": 0.1007, "step": 4988 }, { "epoch": 0.08, "learning_rate": 9.27167257492749e-06, "loss": 0.1076, "step": 5104 }, { "epoch": 0.08, "learning_rate": 9.252980986142443e-06, "loss": 0.1058, "step": 5220 }, { "epoch": 0.08, "eval_loss": 0.21304036676883698, "eval_runtime": 789.6831, "eval_samples_per_second": 2.17, "eval_steps_per_second": 0.272, "eval_wer": 27.66965297085779, "step": 5220 }, { "epoch": 0.09, "learning_rate": 9.234289397357396e-06, "loss": 0.1027, "step": 5336 }, { "epoch": 0.09, "learning_rate": 9.21559780857235e-06, "loss": 0.1013, "step": 5452 }, { "epoch": 0.09, "learning_rate": 9.196906219787304e-06, "loss": 0.1011, "step": 5568 }, { "epoch": 0.09, "learning_rate": 9.178214631002257e-06, "loss": 0.0987, "step": 5684 }, { "epoch": 0.09, "learning_rate": 9.15952304221721e-06, "loss": 0.0981, "step": 5800 }, { "epoch": 0.09, "eval_loss": 0.20962001383304596, "eval_runtime": 788.0578, "eval_samples_per_second": 2.175, "eval_steps_per_second": 0.273, "eval_wer": 30.151374729687987, "step": 5800 }, { "epoch": 0.09, "learning_rate": 9.140831453432163e-06, "loss": 0.0984, "step": 5916 }, { "epoch": 0.1, "learning_rate": 9.122139864647116e-06, "loss": 0.0928, "step": 6032 }, { "epoch": 0.1, "learning_rate": 9.10344827586207e-06, "loss": 0.095, "step": 6148 }, { "epoch": 0.1, "learning_rate": 9.084756687077024e-06, "loss": 0.0988, "step": 6264 }, { "epoch": 0.1, "learning_rate": 9.066065098291977e-06, "loss": 0.0987, "step": 6380 }, { "epoch": 0.1, "eval_loss": 0.2030366212129593, "eval_runtime": 791.7115, "eval_samples_per_second": 2.165, "eval_steps_per_second": 0.272, "eval_wer": 29.23488827103285, "step": 6380 }, { "epoch": 0.1, "learning_rate": 9.04737350950693e-06, "loss": 0.0889, "step": 6496 }, { "epoch": 0.11, "learning_rate": 9.028681920721883e-06, "loss": 0.0866, "step": 6612 }, { "epoch": 0.11, "learning_rate": 9.009990331936836e-06, "loss": 0.088, "step": 6728 }, { "epoch": 0.11, "learning_rate": 8.991298743151789e-06, "loss": 0.0886, "step": 6844 }, { "epoch": 0.11, "learning_rate": 8.972607154366742e-06, "loss": 0.0879, "step": 6960 }, { "epoch": 0.11, "eval_loss": 0.2006106823682785, "eval_runtime": 788.215, "eval_samples_per_second": 2.175, "eval_steps_per_second": 0.273, "eval_wer": 26.814952116156938, "step": 6960 }, { "epoch": 0.11, "learning_rate": 8.953915565581695e-06, "loss": 0.0865, "step": 7076 }, { "epoch": 0.11, "learning_rate": 8.935223976796648e-06, "loss": 0.0881, "step": 7192 }, { "epoch": 0.12, "learning_rate": 8.916532388011603e-06, "loss": 0.0872, "step": 7308 }, { "epoch": 0.12, "learning_rate": 8.897840799226556e-06, "loss": 0.0848, "step": 7424 }, { "epoch": 0.12, "learning_rate": 8.879149210441509e-06, "loss": 0.0817, "step": 7540 }, { "epoch": 0.12, "eval_loss": 0.20483049750328064, "eval_runtime": 791.5631, "eval_samples_per_second": 2.165, "eval_steps_per_second": 0.272, "eval_wer": 28.89506744928432, "step": 7540 }, { "epoch": 0.12, "learning_rate": 8.860457621656462e-06, "loss": 0.0831, "step": 7656 }, { "epoch": 0.12, "learning_rate": 8.841766032871415e-06, "loss": 0.0847, "step": 7772 }, { "epoch": 0.13, "learning_rate": 8.82307444408637e-06, "loss": 0.0864, "step": 7888 }, { "epoch": 0.13, "learning_rate": 8.804382855301323e-06, "loss": 0.0812, "step": 8004 }, { "epoch": 0.13, "learning_rate": 8.785691266516276e-06, "loss": 0.0806, "step": 8120 }, { "epoch": 0.13, "eval_loss": 0.19857698678970337, "eval_runtime": 798.8422, "eval_samples_per_second": 2.146, "eval_steps_per_second": 0.269, "eval_wer": 29.821851508598495, "step": 8120 }, { "epoch": 0.13, "learning_rate": 8.767160812117308e-06, "loss": 0.0772, "step": 8236 }, { "epoch": 0.13, "learning_rate": 8.74846922333226e-06, "loss": 0.082, "step": 8352 }, { "epoch": 0.14, "learning_rate": 8.729777634547214e-06, "loss": 0.0782, "step": 8468 }, { "epoch": 0.14, "learning_rate": 8.711086045762167e-06, "loss": 0.0797, "step": 8584 }, { "epoch": 0.14, "learning_rate": 8.69239445697712e-06, "loss": 0.0766, "step": 8700 }, { "epoch": 0.14, "eval_loss": 0.19560863077640533, "eval_runtime": 782.2802, "eval_samples_per_second": 2.191, "eval_steps_per_second": 0.275, "eval_wer": 28.998043455874782, "step": 8700 }, { "epoch": 0.14, "learning_rate": 8.673702868192073e-06, "loss": 0.0811, "step": 8816 }, { "epoch": 0.14, "learning_rate": 8.655011279407026e-06, "loss": 0.0754, "step": 8932 }, { "epoch": 0.14, "learning_rate": 8.636319690621979e-06, "loss": 0.078, "step": 9048 }, { "epoch": 0.15, "learning_rate": 8.617628101836932e-06, "loss": 0.0799, "step": 9164 }, { "epoch": 0.15, "learning_rate": 8.598936513051886e-06, "loss": 0.0758, "step": 9280 }, { "epoch": 0.15, "eval_loss": 0.1869840919971466, "eval_runtime": 788.0306, "eval_samples_per_second": 2.175, "eval_steps_per_second": 0.273, "eval_wer": 27.031201729996912, "step": 9280 }, { "epoch": 0.15, "learning_rate": 8.58024492426684e-06, "loss": 0.0687, "step": 9396 }, { "epoch": 0.15, "learning_rate": 8.561553335481792e-06, "loss": 0.0758, "step": 9512 }, { "epoch": 0.15, "learning_rate": 8.542861746696746e-06, "loss": 0.0739, "step": 9628 }, { "epoch": 0.16, "learning_rate": 8.524170157911699e-06, "loss": 0.0735, "step": 9744 }, { "epoch": 0.16, "learning_rate": 8.505478569126653e-06, "loss": 0.0749, "step": 9860 }, { "epoch": 0.16, "eval_loss": 0.18276962637901306, "eval_runtime": 787.0753, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 29.63649469673566, "step": 9860 }, { "epoch": 0.16, "learning_rate": 8.486786980341606e-06, "loss": 0.0742, "step": 9976 }, { "epoch": 0.16, "learning_rate": 8.46809539155656e-06, "loss": 0.0702, "step": 10092 }, { "epoch": 0.16, "learning_rate": 8.449403802771512e-06, "loss": 0.0693, "step": 10208 }, { "epoch": 0.16, "learning_rate": 8.430712213986465e-06, "loss": 0.0725, "step": 10324 }, { "epoch": 0.17, "learning_rate": 8.412181759587497e-06, "loss": 0.0713, "step": 10440 }, { "epoch": 0.17, "eval_loss": 0.18643251061439514, "eval_runtime": 785.1719, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 25.98084646277417, "step": 10440 }, { "epoch": 0.17, "learning_rate": 8.39349017080245e-06, "loss": 0.0734, "step": 10556 }, { "epoch": 0.17, "learning_rate": 8.374798582017403e-06, "loss": 0.072, "step": 10672 }, { "epoch": 0.17, "learning_rate": 8.356106993232356e-06, "loss": 0.0704, "step": 10788 }, { "epoch": 0.17, "learning_rate": 8.33741540444731e-06, "loss": 0.0684, "step": 10904 }, { "epoch": 0.18, "learning_rate": 8.318884950048341e-06, "loss": 0.0634, "step": 11020 }, { "epoch": 0.18, "eval_loss": 0.18063540756702423, "eval_runtime": 785.7442, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 26.681083307589333, "step": 11020 }, { "epoch": 0.18, "learning_rate": 8.300193361263294e-06, "loss": 0.0681, "step": 11136 }, { "epoch": 0.18, "learning_rate": 8.281501772478247e-06, "loss": 0.0677, "step": 11252 }, { "epoch": 0.18, "learning_rate": 8.2628101836932e-06, "loss": 0.0701, "step": 11368 }, { "epoch": 0.18, "learning_rate": 8.244118594908153e-06, "loss": 0.0648, "step": 11484 }, { "epoch": 0.19, "learning_rate": 8.225427006123108e-06, "loss": 0.0682, "step": 11600 }, { "epoch": 0.19, "eval_loss": 0.1769612729549408, "eval_runtime": 783.3099, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 26.71197610956647, "step": 11600 }, { "epoch": 0.19, "learning_rate": 8.20673541733806e-06, "loss": 0.065, "step": 11716 }, { "epoch": 0.19, "learning_rate": 8.188043828553014e-06, "loss": 0.0648, "step": 11832 }, { "epoch": 0.19, "learning_rate": 8.169352239767967e-06, "loss": 0.0659, "step": 11948 }, { "epoch": 0.19, "learning_rate": 8.15066065098292e-06, "loss": 0.0672, "step": 12064 }, { "epoch": 0.19, "learning_rate": 8.131969062197875e-06, "loss": 0.0607, "step": 12180 }, { "epoch": 0.19, "eval_loss": 0.1819377839565277, "eval_runtime": 783.0783, "eval_samples_per_second": 2.189, "eval_steps_per_second": 0.275, "eval_wer": 26.990011327360726, "step": 12180 }, { "epoch": 0.2, "learning_rate": 8.113277473412828e-06, "loss": 0.0672, "step": 12296 }, { "epoch": 0.2, "learning_rate": 8.09458588462778e-06, "loss": 0.0602, "step": 12412 }, { "epoch": 0.2, "learning_rate": 8.075894295842734e-06, "loss": 0.0638, "step": 12528 }, { "epoch": 0.2, "learning_rate": 8.057202707057687e-06, "loss": 0.0635, "step": 12644 }, { "epoch": 0.2, "learning_rate": 8.038511118272641e-06, "loss": 0.0635, "step": 12760 }, { "epoch": 0.2, "eval_loss": 0.17338888347148895, "eval_runtime": 785.414, "eval_samples_per_second": 2.182, "eval_steps_per_second": 0.274, "eval_wer": 26.578107300998866, "step": 12760 }, { "epoch": 0.21, "learning_rate": 8.019819529487594e-06, "loss": 0.0656, "step": 12876 }, { "epoch": 0.21, "learning_rate": 8.001127940702547e-06, "loss": 0.0608, "step": 12992 }, { "epoch": 0.21, "learning_rate": 7.9824363519175e-06, "loss": 0.0588, "step": 13108 }, { "epoch": 0.21, "learning_rate": 7.963744763132453e-06, "loss": 0.058, "step": 13224 }, { "epoch": 0.21, "learning_rate": 7.945053174347406e-06, "loss": 0.061, "step": 13340 }, { "epoch": 0.21, "eval_loss": 0.16947728395462036, "eval_runtime": 785.6425, "eval_samples_per_second": 2.182, "eval_steps_per_second": 0.274, "eval_wer": 27.6490577695397, "step": 13340 }, { "epoch": 0.21, "learning_rate": 7.92636158556236e-06, "loss": 0.0579, "step": 13456 }, { "epoch": 0.22, "learning_rate": 7.907669996777313e-06, "loss": 0.0639, "step": 13572 }, { "epoch": 0.22, "learning_rate": 7.888978407992266e-06, "loss": 0.0625, "step": 13688 }, { "epoch": 0.22, "learning_rate": 7.870286819207219e-06, "loss": 0.0611, "step": 13804 }, { "epoch": 0.22, "learning_rate": 7.851595230422173e-06, "loss": 0.0623, "step": 13920 }, { "epoch": 0.22, "eval_loss": 0.16918495297431946, "eval_runtime": 781.9171, "eval_samples_per_second": 2.192, "eval_steps_per_second": 0.275, "eval_wer": 27.422510555040674, "step": 13920 }, { "epoch": 0.22, "learning_rate": 7.832903641637126e-06, "loss": 0.056, "step": 14036 }, { "epoch": 0.23, "learning_rate": 7.81421205285208e-06, "loss": 0.0563, "step": 14152 }, { "epoch": 0.23, "learning_rate": 7.795520464067032e-06, "loss": 0.054, "step": 14268 }, { "epoch": 0.23, "learning_rate": 7.776828875281985e-06, "loss": 0.0536, "step": 14384 }, { "epoch": 0.23, "learning_rate": 7.75813728649694e-06, "loss": 0.0583, "step": 14500 }, { "epoch": 0.23, "eval_loss": 0.17885711789131165, "eval_runtime": 782.2797, "eval_samples_per_second": 2.191, "eval_steps_per_second": 0.275, "eval_wer": 27.679950571516837, "step": 14500 }, { "epoch": 0.23, "learning_rate": 7.739445697711893e-06, "loss": 0.0562, "step": 14616 }, { "epoch": 0.24, "learning_rate": 7.720754108926846e-06, "loss": 0.0586, "step": 14732 }, { "epoch": 0.24, "learning_rate": 7.702062520141799e-06, "loss": 0.056, "step": 14848 }, { "epoch": 0.24, "learning_rate": 7.683370931356752e-06, "loss": 0.0508, "step": 14964 }, { "epoch": 0.24, "learning_rate": 7.664679342571705e-06, "loss": 0.0594, "step": 15080 }, { "epoch": 0.24, "eval_loss": 0.16561517119407654, "eval_runtime": 794.3578, "eval_samples_per_second": 2.158, "eval_steps_per_second": 0.271, "eval_wer": 25.12614560807332, "step": 15080 }, { "epoch": 0.24, "learning_rate": 7.645987753786658e-06, "loss": 0.0535, "step": 15196 }, { "epoch": 0.24, "learning_rate": 7.627296165001612e-06, "loss": 0.0554, "step": 15312 }, { "epoch": 0.25, "learning_rate": 7.608604576216565e-06, "loss": 0.0514, "step": 15428 }, { "epoch": 0.25, "learning_rate": 7.589912987431518e-06, "loss": 0.0537, "step": 15544 }, { "epoch": 0.25, "learning_rate": 7.571221398646472e-06, "loss": 0.0548, "step": 15660 }, { "epoch": 0.25, "eval_loss": 0.1662958413362503, "eval_runtime": 783.8667, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.274, "eval_wer": 25.90876325816085, "step": 15660 }, { "epoch": 0.25, "learning_rate": 7.552529809861425e-06, "loss": 0.0562, "step": 15776 }, { "epoch": 0.25, "learning_rate": 7.533838221076378e-06, "loss": 0.0579, "step": 15892 }, { "epoch": 0.26, "learning_rate": 7.515146632291331e-06, "loss": 0.0563, "step": 16008 }, { "epoch": 0.26, "learning_rate": 7.496455043506284e-06, "loss": 0.0508, "step": 16124 }, { "epoch": 0.26, "learning_rate": 7.477763454721239e-06, "loss": 0.0546, "step": 16240 }, { "epoch": 0.26, "eval_loss": 0.15785543620586395, "eval_runtime": 781.6726, "eval_samples_per_second": 2.193, "eval_steps_per_second": 0.275, "eval_wer": 26.289774482545567, "step": 16240 }, { "epoch": 0.26, "learning_rate": 7.459071865936192e-06, "loss": 0.0496, "step": 16356 }, { "epoch": 0.26, "learning_rate": 7.440380277151145e-06, "loss": 0.0531, "step": 16472 }, { "epoch": 0.26, "learning_rate": 7.421849822752177e-06, "loss": 0.0523, "step": 16588 }, { "epoch": 0.27, "learning_rate": 7.40315823396713e-06, "loss": 0.0565, "step": 16704 }, { "epoch": 0.27, "learning_rate": 7.384466645182083e-06, "loss": 0.0547, "step": 16820 }, { "epoch": 0.27, "eval_loss": 0.16302894055843353, "eval_runtime": 785.7904, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 27.020904129337865, "step": 16820 }, { "epoch": 0.27, "learning_rate": 7.365775056397036e-06, "loss": 0.0532, "step": 16936 }, { "epoch": 0.27, "learning_rate": 7.3470834676119895e-06, "loss": 0.0557, "step": 17052 }, { "epoch": 0.27, "learning_rate": 7.3283918788269426e-06, "loss": 0.0529, "step": 17168 }, { "epoch": 0.28, "learning_rate": 7.309700290041896e-06, "loss": 0.0516, "step": 17284 }, { "epoch": 0.28, "learning_rate": 7.291008701256849e-06, "loss": 0.0543, "step": 17400 }, { "epoch": 0.28, "eval_loss": 0.16456177830696106, "eval_runtime": 783.0024, "eval_samples_per_second": 2.189, "eval_steps_per_second": 0.275, "eval_wer": 26.444238492431264, "step": 17400 }, { "epoch": 0.28, "learning_rate": 7.272317112471802e-06, "loss": 0.054, "step": 17516 }, { "epoch": 0.28, "learning_rate": 7.2536255236867555e-06, "loss": 0.0515, "step": 17632 }, { "epoch": 0.28, "learning_rate": 7.2349339349017085e-06, "loss": 0.0564, "step": 17748 }, { "epoch": 0.29, "learning_rate": 7.2162423461166616e-06, "loss": 0.0523, "step": 17864 }, { "epoch": 0.29, "learning_rate": 7.197550757331615e-06, "loss": 0.0496, "step": 17980 }, { "epoch": 0.29, "eval_loss": 0.16263148188591003, "eval_runtime": 783.5956, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.274, "eval_wer": 23.96251673360107, "step": 17980 }, { "epoch": 0.29, "learning_rate": 7.1788591685465684e-06, "loss": 0.051, "step": 18096 }, { "epoch": 0.29, "learning_rate": 7.160167579761522e-06, "loss": 0.0546, "step": 18212 }, { "epoch": 0.29, "learning_rate": 7.141475990976475e-06, "loss": 0.0486, "step": 18328 }, { "epoch": 0.29, "learning_rate": 7.122784402191428e-06, "loss": 0.0502, "step": 18444 }, { "epoch": 0.3, "learning_rate": 7.104092813406381e-06, "loss": 0.0494, "step": 18560 }, { "epoch": 0.3, "eval_loss": 0.15693338215351105, "eval_runtime": 786.4458, "eval_samples_per_second": 2.179, "eval_steps_per_second": 0.273, "eval_wer": 18.69014519616929, "step": 18560 }, { "epoch": 0.3, "learning_rate": 7.085401224621334e-06, "loss": 0.0482, "step": 18676 }, { "epoch": 0.3, "learning_rate": 7.066709635836288e-06, "loss": 0.0487, "step": 18792 }, { "epoch": 0.3, "learning_rate": 7.048018047051241e-06, "loss": 0.0462, "step": 18908 }, { "epoch": 0.3, "learning_rate": 7.029326458266194e-06, "loss": 0.0488, "step": 19024 }, { "epoch": 0.31, "learning_rate": 7.010634869481147e-06, "loss": 0.0477, "step": 19140 }, { "epoch": 0.31, "eval_loss": 0.15665055811405182, "eval_runtime": 789.4798, "eval_samples_per_second": 2.171, "eval_steps_per_second": 0.272, "eval_wer": 26.176500875296053, "step": 19140 }, { "epoch": 0.31, "learning_rate": 6.9919432806961e-06, "loss": 0.0505, "step": 19256 }, { "epoch": 0.31, "learning_rate": 6.973251691911055e-06, "loss": 0.0488, "step": 19372 }, { "epoch": 0.31, "learning_rate": 6.954560103126008e-06, "loss": 0.0487, "step": 19488 }, { "epoch": 0.31, "learning_rate": 6.935868514340961e-06, "loss": 0.0454, "step": 19604 }, { "epoch": 0.31, "learning_rate": 6.917176925555914e-06, "loss": 0.0439, "step": 19720 }, { "epoch": 0.31, "eval_loss": 0.15991590917110443, "eval_runtime": 784.4279, "eval_samples_per_second": 2.185, "eval_steps_per_second": 0.274, "eval_wer": 26.7840593141798, "step": 19720 }, { "epoch": 0.32, "learning_rate": 6.898485336770867e-06, "loss": 0.0462, "step": 19836 }, { "epoch": 0.32, "learning_rate": 6.879954882371899e-06, "loss": 0.0524, "step": 19952 }, { "epoch": 0.32, "learning_rate": 6.861263293586852e-06, "loss": 0.0506, "step": 20068 }, { "epoch": 0.32, "learning_rate": 6.842571704801805e-06, "loss": 0.0461, "step": 20184 }, { "epoch": 0.32, "learning_rate": 6.823880116016759e-06, "loss": 0.0465, "step": 20300 }, { "epoch": 0.32, "eval_loss": 0.14981767535209656, "eval_runtime": 785.1354, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 25.692513644320876, "step": 20300 }, { "epoch": 0.33, "learning_rate": 6.805188527231712e-06, "loss": 0.0449, "step": 20416 }, { "epoch": 0.33, "learning_rate": 6.786496938446665e-06, "loss": 0.0451, "step": 20532 }, { "epoch": 0.33, "learning_rate": 6.767805349661618e-06, "loss": 0.0478, "step": 20648 }, { "epoch": 0.33, "learning_rate": 6.749113760876571e-06, "loss": 0.0431, "step": 20764 }, { "epoch": 0.33, "learning_rate": 6.730422172091526e-06, "loss": 0.0439, "step": 20880 }, { "epoch": 0.33, "eval_loss": 0.1557285338640213, "eval_runtime": 784.9439, "eval_samples_per_second": 2.184, "eval_steps_per_second": 0.274, "eval_wer": 26.403048089795078, "step": 20880 }, { "epoch": 0.34, "learning_rate": 6.711730583306479e-06, "loss": 0.0485, "step": 20996 }, { "epoch": 0.34, "learning_rate": 6.693038994521432e-06, "loss": 0.0435, "step": 21112 }, { "epoch": 0.34, "learning_rate": 6.674347405736385e-06, "loss": 0.0389, "step": 21228 }, { "epoch": 0.34, "learning_rate": 6.655655816951338e-06, "loss": 0.0444, "step": 21344 }, { "epoch": 0.34, "learning_rate": 6.636964228166292e-06, "loss": 0.0438, "step": 21460 }, { "epoch": 0.34, "eval_loss": 0.15771810710430145, "eval_runtime": 783.9278, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 26.51632169704459, "step": 21460 }, { "epoch": 0.34, "learning_rate": 6.618272639381245e-06, "loss": 0.0439, "step": 21576 }, { "epoch": 0.35, "learning_rate": 6.599581050596198e-06, "loss": 0.0458, "step": 21692 }, { "epoch": 0.35, "learning_rate": 6.580889461811151e-06, "loss": 0.0472, "step": 21808 }, { "epoch": 0.35, "learning_rate": 6.562197873026104e-06, "loss": 0.0433, "step": 21924 }, { "epoch": 0.35, "learning_rate": 6.5435062842410576e-06, "loss": 0.0443, "step": 22040 }, { "epoch": 0.35, "eval_loss": 0.14860232174396515, "eval_runtime": 783.2253, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.275, "eval_wer": 25.96025126145608, "step": 22040 }, { "epoch": 0.35, "learning_rate": 6.5249758298420885e-06, "loss": 0.0409, "step": 22156 }, { "epoch": 0.36, "learning_rate": 6.506284241057042e-06, "loss": 0.0402, "step": 22272 }, { "epoch": 0.36, "learning_rate": 6.487592652271995e-06, "loss": 0.0432, "step": 22388 }, { "epoch": 0.36, "learning_rate": 6.468901063486948e-06, "loss": 0.0407, "step": 22504 }, { "epoch": 0.36, "learning_rate": 6.450209474701901e-06, "loss": 0.0431, "step": 22620 }, { "epoch": 0.36, "eval_loss": 0.15237173438072205, "eval_runtime": 782.7057, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 25.898465657501802, "step": 22620 }, { "epoch": 0.36, "learning_rate": 6.4315178859168544e-06, "loss": 0.0444, "step": 22736 }, { "epoch": 0.36, "learning_rate": 6.412826297131809e-06, "loss": 0.0436, "step": 22852 }, { "epoch": 0.37, "learning_rate": 6.394134708346762e-06, "loss": 0.0448, "step": 22968 }, { "epoch": 0.37, "learning_rate": 6.375443119561715e-06, "loss": 0.04, "step": 23084 }, { "epoch": 0.37, "learning_rate": 6.356751530776668e-06, "loss": 0.0406, "step": 23200 }, { "epoch": 0.37, "eval_loss": 0.15666086971759796, "eval_runtime": 787.7061, "eval_samples_per_second": 2.176, "eval_steps_per_second": 0.273, "eval_wer": 26.33096488518175, "step": 23200 }, { "epoch": 0.37, "learning_rate": 6.338059941991621e-06, "loss": 0.0436, "step": 23316 }, { "epoch": 0.37, "learning_rate": 6.319368353206575e-06, "loss": 0.0395, "step": 23432 }, { "epoch": 0.38, "learning_rate": 6.300676764421528e-06, "loss": 0.0368, "step": 23548 }, { "epoch": 0.38, "learning_rate": 6.281985175636481e-06, "loss": 0.0438, "step": 23664 }, { "epoch": 0.38, "learning_rate": 6.263293586851434e-06, "loss": 0.0406, "step": 23780 }, { "epoch": 0.38, "eval_loss": 0.15179598331451416, "eval_runtime": 785.8287, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 24.786324786324787, "step": 23780 }, { "epoch": 0.38, "learning_rate": 6.244601998066387e-06, "loss": 0.0444, "step": 23896 }, { "epoch": 0.38, "learning_rate": 6.225910409281342e-06, "loss": 0.0415, "step": 24012 }, { "epoch": 0.39, "learning_rate": 6.207218820496295e-06, "loss": 0.0454, "step": 24128 }, { "epoch": 0.39, "learning_rate": 6.188527231711248e-06, "loss": 0.0414, "step": 24244 }, { "epoch": 0.39, "learning_rate": 6.169835642926201e-06, "loss": 0.0405, "step": 24360 }, { "epoch": 0.39, "eval_loss": 0.14717231690883636, "eval_runtime": 783.1318, "eval_samples_per_second": 2.189, "eval_steps_per_second": 0.275, "eval_wer": 19.77139326536917, "step": 24360 }, { "epoch": 0.39, "learning_rate": 6.151305188527233e-06, "loss": 0.0419, "step": 24476 }, { "epoch": 0.39, "learning_rate": 6.132613599742186e-06, "loss": 0.0397, "step": 24592 }, { "epoch": 0.39, "learning_rate": 6.113922010957139e-06, "loss": 0.0389, "step": 24708 }, { "epoch": 0.4, "learning_rate": 6.095230422172092e-06, "loss": 0.0374, "step": 24824 }, { "epoch": 0.4, "learning_rate": 6.076538833387046e-06, "loss": 0.0382, "step": 24940 }, { "epoch": 0.4, "eval_loss": 0.14440029859542847, "eval_runtime": 783.3674, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 20.656986922047164, "step": 24940 }, { "epoch": 0.4, "learning_rate": 6.057847244601999e-06, "loss": 0.0416, "step": 25056 }, { "epoch": 0.4, "learning_rate": 6.039155655816952e-06, "loss": 0.0422, "step": 25172 }, { "epoch": 0.4, "learning_rate": 6.020464067031905e-06, "loss": 0.0369, "step": 25288 }, { "epoch": 0.41, "learning_rate": 6.001772478246858e-06, "loss": 0.0414, "step": 25404 }, { "epoch": 0.41, "learning_rate": 5.983080889461812e-06, "loss": 0.0389, "step": 25520 }, { "epoch": 0.41, "eval_loss": 0.1415482610464096, "eval_runtime": 783.7026, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.274, "eval_wer": 21.944187004427967, "step": 25520 }, { "epoch": 0.41, "learning_rate": 5.964389300676765e-06, "loss": 0.0416, "step": 25636 }, { "epoch": 0.41, "learning_rate": 5.945697711891718e-06, "loss": 0.0386, "step": 25752 }, { "epoch": 0.41, "learning_rate": 5.927006123106671e-06, "loss": 0.0373, "step": 25868 }, { "epoch": 0.41, "learning_rate": 5.9083145343216254e-06, "loss": 0.0407, "step": 25984 }, { "epoch": 0.42, "learning_rate": 5.8896229455365785e-06, "loss": 0.0383, "step": 26100 }, { "epoch": 0.42, "eval_loss": 0.14374086260795593, "eval_runtime": 787.1, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 21.408711770157556, "step": 26100 }, { "epoch": 0.42, "learning_rate": 5.8709313567515315e-06, "loss": 0.0405, "step": 26216 }, { "epoch": 0.42, "learning_rate": 5.8522397679664845e-06, "loss": 0.0414, "step": 26332 }, { "epoch": 0.42, "learning_rate": 5.8335481791814375e-06, "loss": 0.0398, "step": 26448 }, { "epoch": 0.42, "learning_rate": 5.815017724782469e-06, "loss": 0.0421, "step": 26564 }, { "epoch": 0.43, "learning_rate": 5.796326135997422e-06, "loss": 0.036, "step": 26680 }, { "epoch": 0.43, "eval_loss": 0.14346691966056824, "eval_runtime": 787.9907, "eval_samples_per_second": 2.175, "eval_steps_per_second": 0.273, "eval_wer": 21.50139017608897, "step": 26680 }, { "epoch": 0.43, "learning_rate": 5.777634547212375e-06, "loss": 0.0383, "step": 26796 }, { "epoch": 0.43, "learning_rate": 5.758942958427329e-06, "loss": 0.0399, "step": 26912 }, { "epoch": 0.43, "learning_rate": 5.740251369642282e-06, "loss": 0.0368, "step": 27028 }, { "epoch": 0.43, "learning_rate": 5.721559780857235e-06, "loss": 0.0386, "step": 27144 }, { "epoch": 0.44, "learning_rate": 5.702868192072188e-06, "loss": 0.0357, "step": 27260 }, { "epoch": 0.44, "eval_loss": 0.1420992761850357, "eval_runtime": 784.1914, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 23.663886314488725, "step": 27260 }, { "epoch": 0.44, "learning_rate": 5.684176603287141e-06, "loss": 0.0395, "step": 27376 }, { "epoch": 0.44, "learning_rate": 5.665485014502096e-06, "loss": 0.0348, "step": 27492 }, { "epoch": 0.44, "learning_rate": 5.646793425717049e-06, "loss": 0.0361, "step": 27608 }, { "epoch": 0.44, "learning_rate": 5.628101836932002e-06, "loss": 0.0369, "step": 27724 }, { "epoch": 0.44, "learning_rate": 5.609410248146955e-06, "loss": 0.0369, "step": 27840 }, { "epoch": 0.44, "eval_loss": 0.14183476567268372, "eval_runtime": 784.9371, "eval_samples_per_second": 2.184, "eval_steps_per_second": 0.274, "eval_wer": 23.911028730305837, "step": 27840 }, { "epoch": 0.45, "learning_rate": 5.590718659361908e-06, "loss": 0.0372, "step": 27956 }, { "epoch": 0.45, "learning_rate": 5.572027070576862e-06, "loss": 0.0355, "step": 28072 }, { "epoch": 0.45, "learning_rate": 5.553335481791815e-06, "loss": 0.0354, "step": 28188 }, { "epoch": 0.45, "learning_rate": 5.534643893006768e-06, "loss": 0.0342, "step": 28304 }, { "epoch": 0.45, "learning_rate": 5.515952304221721e-06, "loss": 0.035, "step": 28420 }, { "epoch": 0.45, "eval_loss": 0.13896532356739044, "eval_runtime": 785.0487, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 24.88930079291525, "step": 28420 }, { "epoch": 0.46, "learning_rate": 5.497260715436674e-06, "loss": 0.0338, "step": 28536 }, { "epoch": 0.46, "learning_rate": 5.478569126651628e-06, "loss": 0.0364, "step": 28652 }, { "epoch": 0.46, "learning_rate": 5.459877537866582e-06, "loss": 0.0368, "step": 28768 }, { "epoch": 0.46, "learning_rate": 5.441185949081535e-06, "loss": 0.0328, "step": 28884 }, { "epoch": 0.46, "learning_rate": 5.422494360296488e-06, "loss": 0.0368, "step": 29000 }, { "epoch": 0.46, "eval_loss": 0.14057199656963348, "eval_runtime": 793.8154, "eval_samples_per_second": 2.159, "eval_steps_per_second": 0.271, "eval_wer": 23.334363093399237, "step": 29000 }, { "epoch": 0.46, "learning_rate": 5.403802771511441e-06, "loss": 0.0366, "step": 29116 }, { "epoch": 0.47, "learning_rate": 5.385111182726395e-06, "loss": 0.0336, "step": 29232 }, { "epoch": 0.47, "learning_rate": 5.366419593941348e-06, "loss": 0.0343, "step": 29348 }, { "epoch": 0.47, "learning_rate": 5.347728005156301e-06, "loss": 0.0334, "step": 29464 }, { "epoch": 0.47, "learning_rate": 5.329036416371254e-06, "loss": 0.0323, "step": 29580 }, { "epoch": 0.47, "eval_loss": 0.1342051774263382, "eval_runtime": 783.5326, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 21.975079806405105, "step": 29580 }, { "epoch": 0.47, "learning_rate": 5.310344827586207e-06, "loss": 0.0349, "step": 29696 }, { "epoch": 0.48, "learning_rate": 5.291653238801161e-06, "loss": 0.0311, "step": 29812 }, { "epoch": 0.48, "learning_rate": 5.272961650016114e-06, "loss": 0.0322, "step": 29928 }, { "epoch": 0.48, "learning_rate": 5.254270061231067e-06, "loss": 0.0341, "step": 30044 }, { "epoch": 0.48, "learning_rate": 5.2368675475346446e-06, "loss": 0.1425, "step": 30160 }, { "epoch": 0.48, "eval_loss": 0.1378883421421051, "eval_runtime": 787.4376, "eval_samples_per_second": 2.177, "eval_steps_per_second": 0.273, "eval_wer": 15.343424981979197, "step": 30160 }, { "epoch": 0.48, "learning_rate": 5.218175958749598e-06, "loss": 0.0322, "step": 30276 }, { "epoch": 0.49, "learning_rate": 5.1994843699645515e-06, "loss": 0.0351, "step": 30392 }, { "epoch": 0.49, "learning_rate": 5.1807927811795045e-06, "loss": 0.0362, "step": 30508 }, { "epoch": 0.49, "learning_rate": 5.1621011923944575e-06, "loss": 0.0333, "step": 30624 }, { "epoch": 0.49, "learning_rate": 5.1434096036094105e-06, "loss": 0.0324, "step": 30740 }, { "epoch": 0.49, "eval_loss": 0.1346246302127838, "eval_runtime": 783.3435, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 10.513850272886417, "step": 30740 }, { "epoch": 0.49, "learning_rate": 5.1247180148243635e-06, "loss": 0.035, "step": 30856 }, { "epoch": 0.49, "learning_rate": 5.106026426039317e-06, "loss": 0.0353, "step": 30972 }, { "epoch": 0.5, "learning_rate": 5.0873348372542704e-06, "loss": 0.0315, "step": 31088 }, { "epoch": 0.5, "learning_rate": 5.0686432484692235e-06, "loss": 0.0355, "step": 31204 }, { "epoch": 0.5, "learning_rate": 5.0499516596841765e-06, "loss": 0.0303, "step": 31320 }, { "epoch": 0.5, "eval_loss": 0.13406488299369812, "eval_runtime": 785.1703, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 10.24611265575121, "step": 31320 }, { "epoch": 0.5, "learning_rate": 5.03126007089913e-06, "loss": 0.0326, "step": 31436 }, { "epoch": 0.5, "learning_rate": 5.012568482114084e-06, "loss": 0.0303, "step": 31552 }, { "epoch": 0.51, "learning_rate": 4.993876893329036e-06, "loss": 0.0342, "step": 31668 }, { "epoch": 0.51, "learning_rate": 4.97518530454399e-06, "loss": 0.0297, "step": 31784 }, { "epoch": 0.51, "learning_rate": 4.956493715758943e-06, "loss": 0.0298, "step": 31900 }, { "epoch": 0.51, "eval_loss": 0.13156923651695251, "eval_runtime": 788.5054, "eval_samples_per_second": 2.174, "eval_steps_per_second": 0.273, "eval_wer": 9.535578210277006, "step": 31900 }, { "epoch": 0.51, "learning_rate": 4.937802126973897e-06, "loss": 0.0333, "step": 32016 }, { "epoch": 0.51, "learning_rate": 4.91911053818885e-06, "loss": 0.0314, "step": 32132 }, { "epoch": 0.51, "learning_rate": 4.900418949403803e-06, "loss": 0.0303, "step": 32248 }, { "epoch": 0.52, "learning_rate": 4.881727360618756e-06, "loss": 0.0332, "step": 32364 }, { "epoch": 0.52, "learning_rate": 4.863035771833709e-06, "loss": 0.0308, "step": 32480 }, { "epoch": 0.52, "eval_loss": 0.13265329599380493, "eval_runtime": 782.9581, "eval_samples_per_second": 2.189, "eval_steps_per_second": 0.275, "eval_wer": 9.514983008958913, "step": 32480 }, { "epoch": 0.52, "learning_rate": 4.844344183048663e-06, "loss": 0.033, "step": 32596 }, { "epoch": 0.52, "learning_rate": 4.825652594263616e-06, "loss": 0.0328, "step": 32712 }, { "epoch": 0.52, "learning_rate": 4.806961005478569e-06, "loss": 0.0303, "step": 32828 }, { "epoch": 0.53, "learning_rate": 4.788269416693523e-06, "loss": 0.0297, "step": 32944 }, { "epoch": 0.53, "learning_rate": 4.769577827908476e-06, "loss": 0.0312, "step": 33060 }, { "epoch": 0.53, "eval_loss": 0.1312318742275238, "eval_runtime": 783.4807, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 10.019565441252189, "step": 33060 }, { "epoch": 0.53, "learning_rate": 4.75088623912343e-06, "loss": 0.0305, "step": 33176 }, { "epoch": 0.53, "learning_rate": 4.732194650338383e-06, "loss": 0.0306, "step": 33292 }, { "epoch": 0.53, "learning_rate": 4.713503061553336e-06, "loss": 0.0297, "step": 33408 }, { "epoch": 0.54, "learning_rate": 4.694811472768289e-06, "loss": 0.0287, "step": 33524 }, { "epoch": 0.54, "learning_rate": 4.676119883983242e-06, "loss": 0.0316, "step": 33640 }, { "epoch": 0.54, "eval_loss": 0.12888002395629883, "eval_runtime": 787.0374, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 9.298733395118937, "step": 33640 }, { "epoch": 0.54, "learning_rate": 4.657428295198196e-06, "loss": 0.0299, "step": 33756 }, { "epoch": 0.54, "learning_rate": 4.638736706413149e-06, "loss": 0.0305, "step": 33872 }, { "epoch": 0.54, "learning_rate": 4.620045117628103e-06, "loss": 0.0312, "step": 33988 }, { "epoch": 0.54, "learning_rate": 4.601353528843056e-06, "loss": 0.0294, "step": 34104 }, { "epoch": 0.55, "learning_rate": 4.582661940058009e-06, "loss": 0.0318, "step": 34220 }, { "epoch": 0.55, "eval_loss": 0.12854613363742828, "eval_runtime": 782.7642, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 9.309030995777984, "step": 34220 }, { "epoch": 0.55, "learning_rate": 4.563970351272962e-06, "loss": 0.0315, "step": 34336 }, { "epoch": 0.55, "learning_rate": 4.545278762487915e-06, "loss": 0.0299, "step": 34452 }, { "epoch": 0.55, "learning_rate": 4.526587173702869e-06, "loss": 0.029, "step": 34568 }, { "epoch": 0.55, "learning_rate": 4.507895584917822e-06, "loss": 0.0294, "step": 34684 }, { "epoch": 0.56, "learning_rate": 4.489203996132775e-06, "loss": 0.0305, "step": 34800 }, { "epoch": 0.56, "eval_loss": 0.12825001776218414, "eval_runtime": 786.5669, "eval_samples_per_second": 2.179, "eval_steps_per_second": 0.273, "eval_wer": 9.422304603027493, "step": 34800 }, { "epoch": 0.56, "learning_rate": 4.470512407347729e-06, "loss": 0.0278, "step": 34916 }, { "epoch": 0.56, "learning_rate": 4.451820818562682e-06, "loss": 0.0287, "step": 35032 }, { "epoch": 0.56, "learning_rate": 4.4331292297776355e-06, "loss": 0.0268, "step": 35148 }, { "epoch": 0.56, "learning_rate": 4.4144376409925886e-06, "loss": 0.0275, "step": 35264 }, { "epoch": 0.56, "learning_rate": 4.395746052207542e-06, "loss": 0.0279, "step": 35380 }, { "epoch": 0.56, "eval_loss": 0.12600964307785034, "eval_runtime": 794.742, "eval_samples_per_second": 2.157, "eval_steps_per_second": 0.271, "eval_wer": 9.14426938523324, "step": 35380 }, { "epoch": 0.57, "learning_rate": 4.377054463422495e-06, "loss": 0.0286, "step": 35496 }, { "epoch": 0.57, "learning_rate": 4.358362874637448e-06, "loss": 0.0276, "step": 35612 }, { "epoch": 0.57, "learning_rate": 4.3396712858524015e-06, "loss": 0.0314, "step": 35728 }, { "epoch": 0.57, "learning_rate": 4.3209796970673545e-06, "loss": 0.0336, "step": 35844 }, { "epoch": 0.57, "learning_rate": 4.3022881082823075e-06, "loss": 0.0253, "step": 35960 }, { "epoch": 0.57, "eval_loss": 0.12685632705688477, "eval_runtime": 785.8668, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 9.49438780764082, "step": 35960 }, { "epoch": 0.58, "learning_rate": 4.283596519497261e-06, "loss": 0.0276, "step": 36076 }, { "epoch": 0.58, "learning_rate": 4.2649049307122144e-06, "loss": 0.0283, "step": 36192 }, { "epoch": 0.58, "learning_rate": 4.2462133419271675e-06, "loss": 0.0313, "step": 36308 }, { "epoch": 0.58, "learning_rate": 4.2275217531421205e-06, "loss": 0.0281, "step": 36424 }, { "epoch": 0.58, "learning_rate": 4.2088301643570735e-06, "loss": 0.026, "step": 36540 }, { "epoch": 0.58, "eval_loss": 0.1278238743543625, "eval_runtime": 782.8112, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 9.597363814231285, "step": 36540 }, { "epoch": 0.59, "learning_rate": 4.190138575572027e-06, "loss": 0.0267, "step": 36656 }, { "epoch": 0.59, "learning_rate": 4.17144698678698e-06, "loss": 0.0237, "step": 36772 }, { "epoch": 0.59, "learning_rate": 4.152755398001934e-06, "loss": 0.0281, "step": 36888 }, { "epoch": 0.59, "learning_rate": 4.134063809216887e-06, "loss": 0.0287, "step": 37004 }, { "epoch": 0.59, "learning_rate": 4.11537222043184e-06, "loss": 0.0309, "step": 37120 }, { "epoch": 0.59, "eval_loss": 0.12141475081443787, "eval_runtime": 786.3376, "eval_samples_per_second": 2.18, "eval_steps_per_second": 0.273, "eval_wer": 9.257542992482753, "step": 37120 }, { "epoch": 0.59, "learning_rate": 4.096680631646794e-06, "loss": 0.0282, "step": 37236 }, { "epoch": 0.6, "learning_rate": 4.077989042861747e-06, "loss": 0.0274, "step": 37352 }, { "epoch": 0.6, "learning_rate": 4.0592974540767e-06, "loss": 0.0267, "step": 37468 }, { "epoch": 0.6, "learning_rate": 4.040605865291653e-06, "loss": 0.0277, "step": 37584 }, { "epoch": 0.6, "learning_rate": 4.021914276506606e-06, "loss": 0.0264, "step": 37700 }, { "epoch": 0.6, "eval_loss": 0.12097407132387161, "eval_runtime": 784.7681, "eval_samples_per_second": 2.184, "eval_steps_per_second": 0.274, "eval_wer": 9.113376583256102, "step": 37700 }, { "epoch": 0.6, "learning_rate": 4.00322268772156e-06, "loss": 0.0274, "step": 37816 }, { "epoch": 0.61, "learning_rate": 3.984531098936513e-06, "loss": 0.0274, "step": 37932 }, { "epoch": 0.61, "learning_rate": 3.965839510151467e-06, "loss": 0.0272, "step": 38048 }, { "epoch": 0.61, "learning_rate": 3.94714792136642e-06, "loss": 0.0247, "step": 38164 }, { "epoch": 0.61, "learning_rate": 3.928456332581373e-06, "loss": 0.0267, "step": 38280 }, { "epoch": 0.61, "eval_loss": 0.11954796314239502, "eval_runtime": 787.0916, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 8.691174956235196, "step": 38280 }, { "epoch": 0.61, "learning_rate": 3.909764743796327e-06, "loss": 0.0258, "step": 38396 }, { "epoch": 0.61, "learning_rate": 3.89107315501128e-06, "loss": 0.027, "step": 38512 }, { "epoch": 0.62, "learning_rate": 3.872381566226233e-06, "loss": 0.0218, "step": 38628 }, { "epoch": 0.62, "learning_rate": 3.853689977441186e-06, "loss": 0.0271, "step": 38744 }, { "epoch": 0.62, "learning_rate": 3.834998388656139e-06, "loss": 0.0265, "step": 38860 }, { "epoch": 0.62, "eval_loss": 0.122675821185112, "eval_runtime": 782.1019, "eval_samples_per_second": 2.192, "eval_steps_per_second": 0.275, "eval_wer": 8.938317372052312, "step": 38860 }, { "epoch": 0.62, "learning_rate": 3.816306799871093e-06, "loss": 0.0262, "step": 38976 }, { "epoch": 0.62, "learning_rate": 3.797615211086046e-06, "loss": 0.0247, "step": 39092 }, { "epoch": 0.63, "learning_rate": 3.7789236223009994e-06, "loss": 0.0244, "step": 39208 }, { "epoch": 0.63, "learning_rate": 3.7602320335159524e-06, "loss": 0.0238, "step": 39324 }, { "epoch": 0.63, "learning_rate": 3.7415404447309054e-06, "loss": 0.0249, "step": 39440 }, { "epoch": 0.63, "eval_loss": 0.12250470370054245, "eval_runtime": 786.0666, "eval_samples_per_second": 2.18, "eval_steps_per_second": 0.274, "eval_wer": 9.020698177324684, "step": 39440 }, { "epoch": 0.63, "learning_rate": 3.7228488559458593e-06, "loss": 0.0212, "step": 39556 }, { "epoch": 0.63, "learning_rate": 3.7041572671608123e-06, "loss": 0.0245, "step": 39672 }, { "epoch": 0.64, "learning_rate": 3.6854656783757658e-06, "loss": 0.0248, "step": 39788 }, { "epoch": 0.64, "learning_rate": 3.6667740895907188e-06, "loss": 0.0246, "step": 39904 }, { "epoch": 0.64, "learning_rate": 3.6480825008056726e-06, "loss": 0.0243, "step": 40020 }, { "epoch": 0.64, "eval_loss": 0.11990202963352203, "eval_runtime": 783.4725, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 8.608794150962826, "step": 40020 }, { "epoch": 0.64, "learning_rate": 3.6293909120206257e-06, "loss": 0.0254, "step": 40136 }, { "epoch": 0.64, "learning_rate": 3.6106993232355787e-06, "loss": 0.0232, "step": 40252 }, { "epoch": 0.64, "learning_rate": 3.592007734450532e-06, "loss": 0.0228, "step": 40368 }, { "epoch": 0.65, "learning_rate": 3.573316145665485e-06, "loss": 0.0235, "step": 40484 }, { "epoch": 0.65, "learning_rate": 3.554624556880439e-06, "loss": 0.028, "step": 40600 }, { "epoch": 0.65, "eval_loss": 0.11790936440229416, "eval_runtime": 795.6896, "eval_samples_per_second": 2.154, "eval_steps_per_second": 0.27, "eval_wer": 8.722067758212336, "step": 40600 }, { "epoch": 0.65, "learning_rate": 3.535932968095392e-06, "loss": 0.0245, "step": 40716 }, { "epoch": 0.65, "learning_rate": 3.517241379310345e-06, "loss": 0.0244, "step": 40832 }, { "epoch": 0.65, "learning_rate": 3.4985497905252985e-06, "loss": 0.0248, "step": 40948 }, { "epoch": 0.66, "learning_rate": 3.4798582017402515e-06, "loss": 0.0247, "step": 41064 }, { "epoch": 0.66, "learning_rate": 3.461166612955205e-06, "loss": 0.0237, "step": 41180 }, { "epoch": 0.66, "eval_loss": 0.1159593015909195, "eval_runtime": 782.6245, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 8.752960560189475, "step": 41180 }, { "epoch": 0.66, "learning_rate": 3.442475024170158e-06, "loss": 0.0271, "step": 41296 }, { "epoch": 0.66, "learning_rate": 3.4237834353851115e-06, "loss": 0.0243, "step": 41412 }, { "epoch": 0.66, "learning_rate": 3.405091846600065e-06, "loss": 0.0239, "step": 41528 }, { "epoch": 0.66, "learning_rate": 3.386400257815018e-06, "loss": 0.0262, "step": 41644 }, { "epoch": 0.67, "learning_rate": 3.3677086690299714e-06, "loss": 0.025, "step": 41760 }, { "epoch": 0.67, "eval_loss": 0.11735337227582932, "eval_runtime": 784.0887, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 9.010400576665637, "step": 41760 }, { "epoch": 0.67, "learning_rate": 3.3490170802449244e-06, "loss": 0.0267, "step": 41876 }, { "epoch": 0.67, "learning_rate": 3.3303254914598774e-06, "loss": 0.0211, "step": 41992 }, { "epoch": 0.67, "learning_rate": 3.3116339026748313e-06, "loss": 0.0213, "step": 42108 }, { "epoch": 0.67, "learning_rate": 3.2929423138897843e-06, "loss": 0.0229, "step": 42224 }, { "epoch": 0.68, "learning_rate": 3.2742507251047378e-06, "loss": 0.0222, "step": 42340 }, { "epoch": 0.68, "eval_loss": 0.12385321408510208, "eval_runtime": 782.3158, "eval_samples_per_second": 2.191, "eval_steps_per_second": 0.275, "eval_wer": 8.794150962825661, "step": 42340 }, { "epoch": 0.68, "learning_rate": 3.2555591363196908e-06, "loss": 0.0217, "step": 42456 }, { "epoch": 0.68, "learning_rate": 3.236867547534644e-06, "loss": 0.0214, "step": 42572 }, { "epoch": 0.68, "learning_rate": 3.2181759587495977e-06, "loss": 0.0245, "step": 42688 }, { "epoch": 0.68, "learning_rate": 3.1994843699645507e-06, "loss": 0.0247, "step": 42804 }, { "epoch": 0.69, "learning_rate": 3.180792781179504e-06, "loss": 0.0231, "step": 42920 }, { "epoch": 0.69, "eval_loss": 0.11846602708101273, "eval_runtime": 793.246, "eval_samples_per_second": 2.161, "eval_steps_per_second": 0.271, "eval_wer": 8.299866131191433, "step": 42920 }, { "epoch": 0.69, "learning_rate": 3.162101192394457e-06, "loss": 0.0237, "step": 43036 }, { "epoch": 0.69, "learning_rate": 3.14340960360941e-06, "loss": 0.0255, "step": 43152 }, { "epoch": 0.69, "learning_rate": 3.124718014824364e-06, "loss": 0.0229, "step": 43268 }, { "epoch": 0.69, "learning_rate": 3.106026426039317e-06, "loss": 0.0255, "step": 43384 }, { "epoch": 0.69, "learning_rate": 3.0873348372542705e-06, "loss": 0.0241, "step": 43500 }, { "epoch": 0.69, "eval_loss": 0.11603421717882156, "eval_runtime": 782.7894, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 8.474925342395222, "step": 43500 }, { "epoch": 0.7, "learning_rate": 3.0686432484692235e-06, "loss": 0.023, "step": 43616 }, { "epoch": 0.7, "learning_rate": 3.0499516596841766e-06, "loss": 0.0206, "step": 43732 }, { "epoch": 0.7, "learning_rate": 3.03126007089913e-06, "loss": 0.0214, "step": 43848 }, { "epoch": 0.7, "learning_rate": 3.0125684821140835e-06, "loss": 0.0248, "step": 43964 }, { "epoch": 0.7, "learning_rate": 2.993876893329037e-06, "loss": 0.0222, "step": 44080 }, { "epoch": 0.7, "eval_loss": 0.11518887430429459, "eval_runtime": 778.913, "eval_samples_per_second": 2.201, "eval_steps_per_second": 0.276, "eval_wer": 8.382246936463805, "step": 44080 }, { "epoch": 0.71, "learning_rate": 2.97518530454399e-06, "loss": 0.026, "step": 44196 }, { "epoch": 0.71, "learning_rate": 2.956493715758943e-06, "loss": 0.0238, "step": 44312 }, { "epoch": 0.71, "learning_rate": 2.9378021269738964e-06, "loss": 0.0225, "step": 44428 }, { "epoch": 0.71, "learning_rate": 2.9191105381888494e-06, "loss": 0.0223, "step": 44544 }, { "epoch": 0.71, "learning_rate": 2.9004189494038033e-06, "loss": 0.0217, "step": 44660 }, { "epoch": 0.71, "eval_loss": 0.11238180845975876, "eval_runtime": 790.4158, "eval_samples_per_second": 2.168, "eval_steps_per_second": 0.272, "eval_wer": 8.629389352280919, "step": 44660 }, { "epoch": 0.71, "learning_rate": 2.8817273606187563e-06, "loss": 0.022, "step": 44776 }, { "epoch": 0.72, "learning_rate": 2.8630357718337093e-06, "loss": 0.0229, "step": 44892 }, { "epoch": 0.72, "learning_rate": 2.8443441830486628e-06, "loss": 0.0195, "step": 45008 }, { "epoch": 0.72, "learning_rate": 2.825652594263616e-06, "loss": 0.0198, "step": 45124 }, { "epoch": 0.72, "learning_rate": 2.8069610054785697e-06, "loss": 0.0212, "step": 45240 }, { "epoch": 0.72, "eval_loss": 0.11343366652727127, "eval_runtime": 788.7719, "eval_samples_per_second": 2.173, "eval_steps_per_second": 0.273, "eval_wer": 8.299866131191433, "step": 45240 }, { "epoch": 0.72, "learning_rate": 2.7882694166935227e-06, "loss": 0.0216, "step": 45356 }, { "epoch": 0.73, "learning_rate": 2.7695778279084757e-06, "loss": 0.0214, "step": 45472 }, { "epoch": 0.73, "learning_rate": 2.750886239123429e-06, "loss": 0.0199, "step": 45588 }, { "epoch": 0.73, "learning_rate": 2.732194650338382e-06, "loss": 0.022, "step": 45704 }, { "epoch": 0.73, "learning_rate": 2.713503061553336e-06, "loss": 0.019, "step": 45820 }, { "epoch": 0.73, "eval_loss": 0.11607277393341064, "eval_runtime": 785.504, "eval_samples_per_second": 2.182, "eval_steps_per_second": 0.274, "eval_wer": 8.196890124600968, "step": 45820 }, { "epoch": 0.73, "learning_rate": 2.694811472768289e-06, "loss": 0.0222, "step": 45936 }, { "epoch": 0.74, "learning_rate": 2.6761198839832425e-06, "loss": 0.0205, "step": 46052 }, { "epoch": 0.74, "learning_rate": 2.6574282951981955e-06, "loss": 0.0217, "step": 46168 }, { "epoch": 0.74, "learning_rate": 2.6387367064131486e-06, "loss": 0.0222, "step": 46284 }, { "epoch": 0.74, "learning_rate": 2.620045117628102e-06, "loss": 0.0198, "step": 46400 }, { "epoch": 0.74, "eval_loss": 0.11315659433603287, "eval_runtime": 783.5623, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.274, "eval_wer": 8.413139738440943, "step": 46400 }, { "epoch": 0.74, "learning_rate": 2.6013535288430555e-06, "loss": 0.0225, "step": 46516 }, { "epoch": 0.74, "learning_rate": 2.582661940058009e-06, "loss": 0.0234, "step": 46632 }, { "epoch": 0.75, "learning_rate": 2.563970351272962e-06, "loss": 0.0225, "step": 46748 }, { "epoch": 0.75, "learning_rate": 2.545278762487915e-06, "loss": 0.0194, "step": 46864 }, { "epoch": 0.75, "learning_rate": 2.5265871737028684e-06, "loss": 0.0239, "step": 46980 }, { "epoch": 0.75, "eval_loss": 0.11144877225160599, "eval_runtime": 793.7955, "eval_samples_per_second": 2.159, "eval_steps_per_second": 0.271, "eval_wer": 8.10421171866955, "step": 46980 }, { "epoch": 0.75, "learning_rate": 2.5078955849178214e-06, "loss": 0.0188, "step": 47096 }, { "epoch": 0.75, "learning_rate": 2.489203996132775e-06, "loss": 0.0215, "step": 47212 }, { "epoch": 0.76, "learning_rate": 2.4705124073477283e-06, "loss": 0.0192, "step": 47328 }, { "epoch": 0.76, "learning_rate": 2.4518208185626818e-06, "loss": 0.0209, "step": 47444 }, { "epoch": 0.76, "learning_rate": 2.4331292297776348e-06, "loss": 0.0195, "step": 47560 }, { "epoch": 0.76, "eval_loss": 0.11171752959489822, "eval_runtime": 787.1503, "eval_samples_per_second": 2.177, "eval_steps_per_second": 0.273, "eval_wer": 8.227782926578108, "step": 47560 }, { "epoch": 0.76, "learning_rate": 2.414437640992588e-06, "loss": 0.0193, "step": 47676 }, { "epoch": 0.76, "learning_rate": 2.3957460522075413e-06, "loss": 0.0177, "step": 47792 }, { "epoch": 0.76, "learning_rate": 2.3770544634224947e-06, "loss": 0.0194, "step": 47908 }, { "epoch": 0.77, "learning_rate": 2.3583628746374477e-06, "loss": 0.0192, "step": 48024 }, { "epoch": 0.77, "learning_rate": 2.339671285852401e-06, "loss": 0.0208, "step": 48140 }, { "epoch": 0.77, "eval_loss": 0.10952310264110565, "eval_runtime": 788.6548, "eval_samples_per_second": 2.173, "eval_steps_per_second": 0.273, "eval_wer": 8.042426114715271, "step": 48140 }, { "epoch": 0.77, "learning_rate": 2.320979697067354e-06, "loss": 0.0197, "step": 48256 }, { "epoch": 0.77, "learning_rate": 2.3022881082823076e-06, "loss": 0.0192, "step": 48372 }, { "epoch": 0.77, "learning_rate": 2.283757653883339e-06, "loss": 0.0178, "step": 48488 }, { "epoch": 0.78, "learning_rate": 2.2650660650982924e-06, "loss": 0.0186, "step": 48604 }, { "epoch": 0.78, "learning_rate": 2.2463744763132454e-06, "loss": 0.0201, "step": 48720 }, { "epoch": 0.78, "eval_loss": 0.10950493812561035, "eval_runtime": 783.1853, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.275, "eval_wer": 7.867366903511481, "step": 48720 }, { "epoch": 0.78, "learning_rate": 2.2276828875281985e-06, "loss": 0.0191, "step": 48836 }, { "epoch": 0.78, "learning_rate": 2.208991298743152e-06, "loss": 0.0194, "step": 48952 }, { "epoch": 0.78, "learning_rate": 2.1902997099581054e-06, "loss": 0.0204, "step": 49068 }, { "epoch": 0.79, "learning_rate": 2.1716081211730584e-06, "loss": 0.0205, "step": 49184 }, { "epoch": 0.79, "learning_rate": 2.152916532388012e-06, "loss": 0.0191, "step": 49300 }, { "epoch": 0.79, "eval_loss": 0.11016014218330383, "eval_runtime": 787.2967, "eval_samples_per_second": 2.177, "eval_steps_per_second": 0.273, "eval_wer": 8.052723715374317, "step": 49300 }, { "epoch": 0.79, "learning_rate": 2.134224943602965e-06, "loss": 0.0186, "step": 49416 }, { "epoch": 0.79, "learning_rate": 2.1155333548179183e-06, "loss": 0.0177, "step": 49532 }, { "epoch": 0.79, "learning_rate": 2.0968417660328717e-06, "loss": 0.019, "step": 49648 }, { "epoch": 0.79, "learning_rate": 2.0781501772478248e-06, "loss": 0.02, "step": 49764 }, { "epoch": 0.8, "learning_rate": 2.059458588462778e-06, "loss": 0.0192, "step": 49880 }, { "epoch": 0.8, "eval_loss": 0.10825244337320328, "eval_runtime": 784.2924, "eval_samples_per_second": 2.185, "eval_steps_per_second": 0.274, "eval_wer": 7.908557306147667, "step": 49880 }, { "epoch": 0.8, "learning_rate": 2.0407669996777312e-06, "loss": 0.0223, "step": 49996 }, { "epoch": 0.8, "learning_rate": 2.0220754108926847e-06, "loss": 0.0178, "step": 50112 }, { "epoch": 0.8, "learning_rate": 2.003383822107638e-06, "loss": 0.017, "step": 50228 }, { "epoch": 0.8, "learning_rate": 1.984692233322591e-06, "loss": 0.0175, "step": 50344 }, { "epoch": 0.81, "learning_rate": 1.9661617789236225e-06, "loss": 0.0201, "step": 50460 }, { "epoch": 0.81, "eval_loss": 0.10784495621919632, "eval_runtime": 782.5124, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.275, "eval_wer": 7.805581299557203, "step": 50460 }, { "epoch": 0.81, "learning_rate": 1.9474701901385755e-06, "loss": 0.0184, "step": 50576 }, { "epoch": 0.81, "learning_rate": 1.928778601353529e-06, "loss": 0.0175, "step": 50692 }, { "epoch": 0.81, "learning_rate": 1.9100870125684824e-06, "loss": 0.0191, "step": 50808 }, { "epoch": 0.81, "learning_rate": 1.8913954237834356e-06, "loss": 0.0169, "step": 50924 }, { "epoch": 0.81, "learning_rate": 1.8727038349983889e-06, "loss": 0.0186, "step": 51040 }, { "epoch": 0.81, "eval_loss": 0.10628069937229156, "eval_runtime": 794.0691, "eval_samples_per_second": 2.159, "eval_steps_per_second": 0.271, "eval_wer": 7.527546081762949, "step": 51040 }, { "epoch": 0.82, "learning_rate": 1.8540122462133419e-06, "loss": 0.0172, "step": 51156 }, { "epoch": 0.82, "learning_rate": 1.8353206574282953e-06, "loss": 0.0182, "step": 51272 }, { "epoch": 0.82, "learning_rate": 1.8166290686432486e-06, "loss": 0.0175, "step": 51388 }, { "epoch": 0.82, "learning_rate": 1.797937479858202e-06, "loss": 0.0181, "step": 51504 }, { "epoch": 0.82, "learning_rate": 1.7792458910731552e-06, "loss": 0.0172, "step": 51620 }, { "epoch": 0.82, "eval_loss": 0.10713626444339752, "eval_runtime": 787.3756, "eval_samples_per_second": 2.177, "eval_steps_per_second": 0.273, "eval_wer": 7.898259705488621, "step": 51620 }, { "epoch": 0.83, "learning_rate": 1.7605543022881083e-06, "loss": 0.0165, "step": 51736 }, { "epoch": 0.83, "learning_rate": 1.7418627135030617e-06, "loss": 0.0189, "step": 51852 }, { "epoch": 0.83, "learning_rate": 1.723171124718015e-06, "loss": 0.0182, "step": 51968 }, { "epoch": 0.83, "learning_rate": 1.7044795359329682e-06, "loss": 0.0171, "step": 52084 }, { "epoch": 0.83, "learning_rate": 1.6857879471479216e-06, "loss": 0.0163, "step": 52200 }, { "epoch": 0.83, "eval_loss": 0.10758110135793686, "eval_runtime": 786.0519, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 7.64081968901246, "step": 52200 }, { "epoch": 0.84, "learning_rate": 1.6670963583628746e-06, "loss": 0.018, "step": 52316 }, { "epoch": 0.84, "learning_rate": 1.6484047695778279e-06, "loss": 0.0169, "step": 52432 }, { "epoch": 0.84, "learning_rate": 1.6297131807927813e-06, "loss": 0.0172, "step": 52548 }, { "epoch": 0.84, "learning_rate": 1.6110215920077346e-06, "loss": 0.0146, "step": 52664 }, { "epoch": 0.84, "learning_rate": 1.592330003222688e-06, "loss": 0.0161, "step": 52780 }, { "epoch": 0.84, "eval_loss": 0.10708160698413849, "eval_runtime": 786.9909, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 7.92915250746576, "step": 52780 }, { "epoch": 0.84, "learning_rate": 1.5736384144376412e-06, "loss": 0.0168, "step": 52896 }, { "epoch": 0.85, "learning_rate": 1.5551079600386724e-06, "loss": 0.0166, "step": 53012 }, { "epoch": 0.85, "learning_rate": 1.5364163712536256e-06, "loss": 0.0162, "step": 53128 }, { "epoch": 0.85, "learning_rate": 1.517724782468579e-06, "loss": 0.0176, "step": 53244 }, { "epoch": 0.85, "learning_rate": 1.4990331936835323e-06, "loss": 0.0189, "step": 53360 }, { "epoch": 0.85, "eval_loss": 0.1049240455031395, "eval_runtime": 784.1212, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 7.589331685717228, "step": 53360 }, { "epoch": 0.85, "learning_rate": 1.4803416048984853e-06, "loss": 0.0152, "step": 53476 }, { "epoch": 0.86, "learning_rate": 1.4616500161134387e-06, "loss": 0.0193, "step": 53592 }, { "epoch": 0.86, "learning_rate": 1.442958427328392e-06, "loss": 0.0165, "step": 53708 }, { "epoch": 0.86, "learning_rate": 1.4242668385433452e-06, "loss": 0.0175, "step": 53824 }, { "epoch": 0.86, "learning_rate": 1.4055752497582987e-06, "loss": 0.0167, "step": 53940 }, { "epoch": 0.86, "eval_loss": 0.10481404513120651, "eval_runtime": 784.2538, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 7.568736484399135, "step": 53940 }, { "epoch": 0.86, "learning_rate": 1.386883660973252e-06, "loss": 0.0138, "step": 54056 }, { "epoch": 0.86, "learning_rate": 1.368192072188205e-06, "loss": 0.0183, "step": 54172 }, { "epoch": 0.87, "learning_rate": 1.3495004834031584e-06, "loss": 0.0158, "step": 54288 }, { "epoch": 0.87, "learning_rate": 1.3308088946181116e-06, "loss": 0.018, "step": 54404 }, { "epoch": 0.87, "learning_rate": 1.312117305833065e-06, "loss": 0.016, "step": 54520 }, { "epoch": 0.87, "eval_loss": 0.10257178544998169, "eval_runtime": 779.3466, "eval_samples_per_second": 2.199, "eval_steps_per_second": 0.276, "eval_wer": 7.537843682421997, "step": 54520 }, { "epoch": 0.87, "learning_rate": 1.2934257170480183e-06, "loss": 0.0182, "step": 54636 }, { "epoch": 0.87, "learning_rate": 1.2747341282629713e-06, "loss": 0.0167, "step": 54752 }, { "epoch": 0.88, "learning_rate": 1.2560425394779247e-06, "loss": 0.0182, "step": 54868 }, { "epoch": 0.88, "learning_rate": 1.237350950692878e-06, "loss": 0.0173, "step": 54984 }, { "epoch": 0.88, "learning_rate": 1.2186593619078312e-06, "loss": 0.016, "step": 55100 }, { "epoch": 0.88, "eval_loss": 0.1033649742603302, "eval_runtime": 792.3552, "eval_samples_per_second": 2.163, "eval_steps_per_second": 0.271, "eval_wer": 7.403974873854392, "step": 55100 }, { "epoch": 0.88, "learning_rate": 1.1999677731227845e-06, "loss": 0.0169, "step": 55216 }, { "epoch": 0.88, "learning_rate": 1.181276184337738e-06, "loss": 0.0151, "step": 55332 }, { "epoch": 0.89, "learning_rate": 1.162584595552691e-06, "loss": 0.0167, "step": 55448 }, { "epoch": 0.89, "learning_rate": 1.1438930067676444e-06, "loss": 0.015, "step": 55564 }, { "epoch": 0.89, "learning_rate": 1.1252014179825976e-06, "loss": 0.015, "step": 55680 }, { "epoch": 0.89, "eval_loss": 0.10430463403463364, "eval_runtime": 785.9691, "eval_samples_per_second": 2.181, "eval_steps_per_second": 0.274, "eval_wer": 7.424570075172484, "step": 55680 }, { "epoch": 0.89, "learning_rate": 1.1065098291975508e-06, "loss": 0.0183, "step": 55796 }, { "epoch": 0.89, "learning_rate": 1.087818240412504e-06, "loss": 0.0173, "step": 55912 }, { "epoch": 0.89, "learning_rate": 1.0691266516274573e-06, "loss": 0.015, "step": 56028 }, { "epoch": 0.9, "learning_rate": 1.0504350628424108e-06, "loss": 0.0156, "step": 56144 }, { "epoch": 0.9, "learning_rate": 1.031743474057364e-06, "loss": 0.0163, "step": 56260 }, { "epoch": 0.9, "eval_loss": 0.10278747975826263, "eval_runtime": 787.108, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 7.5481412830810415, "step": 56260 }, { "epoch": 0.9, "learning_rate": 1.0130518852723172e-06, "loss": 0.0176, "step": 56376 }, { "epoch": 0.9, "learning_rate": 9.943602964872705e-07, "loss": 0.0157, "step": 56492 }, { "epoch": 0.9, "learning_rate": 9.75668707702224e-07, "loss": 0.0138, "step": 56608 }, { "epoch": 0.91, "learning_rate": 9.56977118917177e-07, "loss": 0.015, "step": 56724 }, { "epoch": 0.91, "learning_rate": 9.382855301321303e-07, "loss": 0.0154, "step": 56840 }, { "epoch": 0.91, "eval_loss": 0.1029290109872818, "eval_runtime": 783.9469, "eval_samples_per_second": 2.186, "eval_steps_per_second": 0.274, "eval_wer": 7.455462877149624, "step": 56840 }, { "epoch": 0.91, "learning_rate": 9.197550757331615e-07, "loss": 0.017, "step": 56956 }, { "epoch": 0.91, "learning_rate": 9.010634869481148e-07, "loss": 0.0156, "step": 57072 }, { "epoch": 0.91, "learning_rate": 8.82371898163068e-07, "loss": 0.016, "step": 57188 }, { "epoch": 0.91, "learning_rate": 8.636803093780213e-07, "loss": 0.0144, "step": 57304 }, { "epoch": 0.92, "learning_rate": 8.449887205929746e-07, "loss": 0.0176, "step": 57420 }, { "epoch": 0.92, "eval_loss": 0.1018747016787529, "eval_runtime": 783.2187, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.275, "eval_wer": 7.49665327978581, "step": 57420 }, { "epoch": 0.92, "learning_rate": 8.262971318079279e-07, "loss": 0.018, "step": 57536 }, { "epoch": 0.92, "learning_rate": 8.076055430228811e-07, "loss": 0.0162, "step": 57652 }, { "epoch": 0.92, "learning_rate": 7.889139542378344e-07, "loss": 0.0146, "step": 57768 }, { "epoch": 0.92, "learning_rate": 7.702223654527877e-07, "loss": 0.0133, "step": 57884 }, { "epoch": 0.93, "learning_rate": 7.51530776667741e-07, "loss": 0.0167, "step": 58000 }, { "epoch": 0.93, "eval_loss": 0.10182846337556839, "eval_runtime": 789.1997, "eval_samples_per_second": 2.172, "eval_steps_per_second": 0.272, "eval_wer": 7.445165276490577, "step": 58000 }, { "epoch": 0.93, "learning_rate": 7.328391878826942e-07, "loss": 0.0164, "step": 58116 }, { "epoch": 0.93, "learning_rate": 7.141475990976475e-07, "loss": 0.0151, "step": 58232 }, { "epoch": 0.93, "learning_rate": 6.954560103126008e-07, "loss": 0.0136, "step": 58348 }, { "epoch": 0.93, "learning_rate": 6.76764421527554e-07, "loss": 0.0136, "step": 58464 }, { "epoch": 0.94, "learning_rate": 6.580728327425073e-07, "loss": 0.0163, "step": 58580 }, { "epoch": 0.94, "eval_loss": 0.1013648584485054, "eval_runtime": 785.0865, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 7.445165276490577, "step": 58580 }, { "epoch": 0.94, "learning_rate": 6.393812439574605e-07, "loss": 0.014, "step": 58696 }, { "epoch": 0.94, "learning_rate": 6.206896551724139e-07, "loss": 0.0144, "step": 58812 }, { "epoch": 0.94, "learning_rate": 6.019980663873671e-07, "loss": 0.0147, "step": 58928 }, { "epoch": 0.94, "learning_rate": 5.834676119883983e-07, "loss": 0.013, "step": 59044 }, { "epoch": 0.94, "learning_rate": 5.647760232033517e-07, "loss": 0.0132, "step": 59160 }, { "epoch": 0.94, "eval_loss": 0.10131796449422836, "eval_runtime": 782.9945, "eval_samples_per_second": 2.189, "eval_steps_per_second": 0.275, "eval_wer": 7.136237256719184, "step": 59160 }, { "epoch": 0.95, "learning_rate": 5.460844344183049e-07, "loss": 0.0125, "step": 59276 }, { "epoch": 0.95, "learning_rate": 5.273928456332581e-07, "loss": 0.0118, "step": 59392 }, { "epoch": 0.95, "learning_rate": 5.087012568482115e-07, "loss": 0.0156, "step": 59508 }, { "epoch": 0.95, "learning_rate": 4.900096680631647e-07, "loss": 0.0164, "step": 59624 }, { "epoch": 0.95, "learning_rate": 4.7147921366419595e-07, "loss": 0.0143, "step": 59740 }, { "epoch": 0.95, "eval_loss": 0.10116977989673615, "eval_runtime": 781.1213, "eval_samples_per_second": 2.194, "eval_steps_per_second": 0.275, "eval_wer": 7.383379672536298, "step": 59740 }, { "epoch": 0.96, "learning_rate": 4.527876248791493e-07, "loss": 0.0128, "step": 59856 }, { "epoch": 0.96, "learning_rate": 4.340960360941025e-07, "loss": 0.015, "step": 59972 }, { "epoch": 0.96, "learning_rate": 4.1540444730905576e-07, "loss": 0.0134, "step": 60088 }, { "epoch": 0.96, "learning_rate": 3.9671285852400905e-07, "loss": 0.0148, "step": 60204 }, { "epoch": 0.96, "learning_rate": 3.780212697389624e-07, "loss": 0.0159, "step": 60320 }, { "epoch": 0.96, "eval_loss": 0.10068144649267197, "eval_runtime": 785.2541, "eval_samples_per_second": 2.183, "eval_steps_per_second": 0.274, "eval_wer": 7.311296467922975, "step": 60320 }, { "epoch": 0.96, "learning_rate": 3.593296809539156e-07, "loss": 0.0145, "step": 60436 }, { "epoch": 0.97, "learning_rate": 3.4063809216886885e-07, "loss": 0.0149, "step": 60552 }, { "epoch": 0.97, "learning_rate": 3.2194650338382214e-07, "loss": 0.0158, "step": 60668 }, { "epoch": 0.97, "learning_rate": 3.0325491459877543e-07, "loss": 0.0161, "step": 60784 }, { "epoch": 0.97, "learning_rate": 2.8456332581372866e-07, "loss": 0.0127, "step": 60900 }, { "epoch": 0.97, "eval_loss": 0.10017982870340347, "eval_runtime": 783.6716, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.274, "eval_wer": 7.290701266604881, "step": 60900 }, { "epoch": 0.97, "learning_rate": 2.6587173702868195e-07, "loss": 0.0146, "step": 61016 }, { "epoch": 0.98, "learning_rate": 2.471801482436352e-07, "loss": 0.0155, "step": 61132 }, { "epoch": 0.98, "learning_rate": 2.284885594585885e-07, "loss": 0.0125, "step": 61248 }, { "epoch": 0.98, "learning_rate": 2.0979697067354173e-07, "loss": 0.0161, "step": 61364 }, { "epoch": 0.98, "learning_rate": 1.9110538188849505e-07, "loss": 0.0134, "step": 61480 }, { "epoch": 0.98, "eval_loss": 0.10023297369480133, "eval_runtime": 783.3354, "eval_samples_per_second": 2.188, "eval_steps_per_second": 0.274, "eval_wer": 7.249510863968696, "step": 61480 }, { "epoch": 0.98, "learning_rate": 1.7241379310344828e-07, "loss": 0.0153, "step": 61596 }, { "epoch": 0.99, "learning_rate": 1.5372220431840157e-07, "loss": 0.0154, "step": 61712 }, { "epoch": 0.99, "learning_rate": 1.3503061553335483e-07, "loss": 0.013, "step": 61828 }, { "epoch": 0.99, "learning_rate": 1.163390267483081e-07, "loss": 0.0135, "step": 61944 }, { "epoch": 0.99, "learning_rate": 9.764743796326138e-08, "loss": 0.0147, "step": 62060 }, { "epoch": 0.99, "eval_loss": 0.10011597722768784, "eval_runtime": 799.7654, "eval_samples_per_second": 2.143, "eval_steps_per_second": 0.269, "eval_wer": 7.300998867263927, "step": 62060 }, { "epoch": 0.99, "learning_rate": 7.895584917821464e-08, "loss": 0.0146, "step": 62176 }, { "epoch": 0.99, "learning_rate": 6.02642603931679e-08, "loss": 0.0151, "step": 62292 }, { "epoch": 1.0, "learning_rate": 4.157267160812117e-08, "loss": 0.0136, "step": 62408 }, { "epoch": 1.0, "learning_rate": 2.2881082823074446e-08, "loss": 0.0123, "step": 62524 }, { "epoch": 1.0, "learning_rate": 4.189494038027715e-09, "loss": 0.0144, "step": 62640 }, { "epoch": 1.0, "eval_loss": 0.09998083859682083, "eval_runtime": 786.9298, "eval_samples_per_second": 2.178, "eval_steps_per_second": 0.273, "eval_wer": 7.280403665945835, "step": 62640 }, { "epoch": 1.0, "step": 62640, "total_flos": 2.1279417860837375e+21, "train_loss": 0.051935346220949447, "train_runtime": 527672.5015, "train_samples_per_second": 1.899, "train_steps_per_second": 0.119 } ], "max_steps": 62640, "num_train_epochs": 9223372036854775807, "total_flos": 2.1279417860837375e+21, "trial_name": null, "trial_params": null }