{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 2.5e-05, "loss": 15.9584, "step": 64 }, { "epoch": 0.14, "eval_loss": 14.658380508422852, "eval_runtime": 249.5052, "eval_samples_per_second": 8.208, "eval_wer": 1.0, "step": 64 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 6.3907, "step": 128 }, { "epoch": 0.27, "eval_loss": 4.082584857940674, "eval_runtime": 247.9481, "eval_samples_per_second": 8.26, "eval_wer": 1.0, "step": 128 }, { "epoch": 0.41, "learning_rate": 7.500000000000001e-05, "loss": 3.4412, "step": 192 }, { "epoch": 0.41, "eval_loss": 3.1572635173797607, "eval_runtime": 251.7843, "eval_samples_per_second": 8.134, "eval_wer": 1.0, "step": 192 }, { "epoch": 0.54, "learning_rate": 0.0001, "loss": 3.005, "step": 256 }, { "epoch": 0.54, "eval_loss": 3.0201895236968994, "eval_runtime": 248.8764, "eval_samples_per_second": 8.229, "eval_wer": 1.0, "step": 256 }, { "epoch": 0.68, "learning_rate": 0.000125, "loss": 2.9507, "step": 320 }, { "epoch": 0.68, "eval_loss": 3.0097341537475586, "eval_runtime": 250.2451, "eval_samples_per_second": 8.184, "eval_wer": 1.0, "step": 320 }, { "epoch": 0.82, "learning_rate": 0.00015000000000000001, "loss": 2.9285, "step": 384 }, { "epoch": 0.82, "eval_loss": 3.0145537853240967, "eval_runtime": 249.391, "eval_samples_per_second": 8.212, "eval_wer": 1.0, "step": 384 }, { "epoch": 0.95, "learning_rate": 0.000175, "loss": 2.9081, "step": 448 }, { "epoch": 0.95, "eval_loss": 2.889194965362549, "eval_runtime": 252.4706, "eval_samples_per_second": 8.112, "eval_wer": 1.0, "step": 448 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.8141, "step": 512 }, { "epoch": 1.09, "eval_loss": 3.1128151416778564, "eval_runtime": 249.3837, "eval_samples_per_second": 8.212, "eval_wer": 0.9997997196074504, "step": 512 }, { "epoch": 1.23, "learning_rate": 0.00019855985598559857, "loss": 1.9764, "step": 576 }, { "epoch": 1.23, "eval_loss": 1.4347355365753174, "eval_runtime": 253.6868, "eval_samples_per_second": 8.073, "eval_wer": 0.9202884037652714, "step": 576 }, { "epoch": 1.36, "learning_rate": 0.0001971197119711971, "loss": 1.171, "step": 640 }, { "epoch": 1.36, "eval_loss": 1.1342754364013672, "eval_runtime": 252.7977, "eval_samples_per_second": 8.101, "eval_wer": 0.8585352827291541, "step": 640 }, { "epoch": 1.5, "learning_rate": 0.0001956795679567957, "loss": 0.9986, "step": 704 }, { "epoch": 1.5, "eval_loss": 1.0098124742507935, "eval_runtime": 254.0896, "eval_samples_per_second": 8.06, "eval_wer": 0.8405768075305428, "step": 704 }, { "epoch": 1.63, "learning_rate": 0.00019423942394239426, "loss": 0.8642, "step": 768 }, { "epoch": 1.63, "eval_loss": 0.9640593528747559, "eval_runtime": 254.2048, "eval_samples_per_second": 8.056, "eval_wer": 0.8503905467654717, "step": 768 }, { "epoch": 1.77, "learning_rate": 0.0001927992799279928, "loss": 0.7951, "step": 832 }, { "epoch": 1.77, "eval_loss": 0.8799329996109009, "eval_runtime": 255.0842, "eval_samples_per_second": 8.029, "eval_wer": 0.787903064290006, "step": 832 }, { "epoch": 1.91, "learning_rate": 0.00019135913591359136, "loss": 0.7317, "step": 896 }, { "epoch": 1.91, "eval_loss": 0.8695712089538574, "eval_runtime": 253.1871, "eval_samples_per_second": 8.089, "eval_wer": 0.7686093864743975, "step": 896 }, { "epoch": 2.04, "learning_rate": 0.00018991899189918995, "loss": 0.6835, "step": 960 }, { "epoch": 2.04, "eval_loss": 0.8140479326248169, "eval_runtime": 253.7263, "eval_samples_per_second": 8.072, "eval_wer": 0.7338941184324721, "step": 960 }, { "epoch": 2.18, "learning_rate": 0.00018847884788478848, "loss": 0.6188, "step": 1024 }, { "epoch": 2.18, "eval_loss": 0.8309915065765381, "eval_runtime": 254.0037, "eval_samples_per_second": 8.063, "eval_wer": 0.7473129047332933, "step": 1024 }, { "epoch": 2.31, "learning_rate": 0.00018703870387038705, "loss": 0.6116, "step": 1088 }, { "epoch": 2.31, "eval_loss": 0.8404783010482788, "eval_runtime": 255.6119, "eval_samples_per_second": 8.012, "eval_wer": 0.7376326857600641, "step": 1088 }, { "epoch": 2.45, "learning_rate": 0.0001855985598559856, "loss": 0.5654, "step": 1152 }, { "epoch": 2.45, "eval_loss": 0.7973554134368896, "eval_runtime": 253.93, "eval_samples_per_second": 8.065, "eval_wer": 0.7304893517591294, "step": 1152 }, { "epoch": 2.59, "learning_rate": 0.00018415841584158417, "loss": 0.5697, "step": 1216 }, { "epoch": 2.59, "eval_loss": 0.7916511297225952, "eval_runtime": 255.1499, "eval_samples_per_second": 8.027, "eval_wer": 0.7420388543961546, "step": 1216 }, { "epoch": 2.72, "learning_rate": 0.00018271827182718273, "loss": 0.5621, "step": 1280 }, { "epoch": 2.72, "eval_loss": 0.773869514465332, "eval_runtime": 253.9689, "eval_samples_per_second": 8.064, "eval_wer": 0.7266840243006877, "step": 1280 }, { "epoch": 2.86, "learning_rate": 0.0001812781278127813, "loss": 0.5646, "step": 1344 }, { "epoch": 2.86, "eval_loss": 0.7937338352203369, "eval_runtime": 254.2977, "eval_samples_per_second": 8.054, "eval_wer": 0.70698978569998, "step": 1344 }, { "epoch": 3.0, "learning_rate": 0.00017983798379837983, "loss": 0.5266, "step": 1408 }, { "epoch": 3.0, "eval_loss": 0.738484263420105, "eval_runtime": 251.7487, "eval_samples_per_second": 8.135, "eval_wer": 0.6933707190066093, "step": 1408 }, { "epoch": 3.13, "learning_rate": 0.00017839783978397842, "loss": 0.4719, "step": 1472 }, { "epoch": 3.13, "eval_loss": 0.7587714195251465, "eval_runtime": 253.339, "eval_samples_per_second": 8.084, "eval_wer": 0.6889645503705187, "step": 1472 }, { "epoch": 3.27, "learning_rate": 0.00017695769576957698, "loss": 0.4479, "step": 1536 }, { "epoch": 3.27, "eval_loss": 0.7555954456329346, "eval_runtime": 252.6303, "eval_samples_per_second": 8.107, "eval_wer": 0.6807530542759864, "step": 1536 }, { "epoch": 3.4, "learning_rate": 0.00017551755175517552, "loss": 0.4614, "step": 1600 }, { "epoch": 3.4, "eval_loss": 0.7171066999435425, "eval_runtime": 252.5327, "eval_samples_per_second": 8.11, "eval_wer": 0.6792843313972895, "step": 1600 }, { "epoch": 3.54, "learning_rate": 0.00017407740774077408, "loss": 0.4491, "step": 1664 }, { "epoch": 3.54, "eval_loss": 0.7290804386138916, "eval_runtime": 251.92, "eval_samples_per_second": 8.13, "eval_wer": 0.6693370719006609, "step": 1664 }, { "epoch": 3.68, "learning_rate": 0.00017263726372637264, "loss": 0.4355, "step": 1728 }, { "epoch": 3.68, "eval_loss": 0.7258065938949585, "eval_runtime": 251.8895, "eval_samples_per_second": 8.131, "eval_wer": 0.6802857333600374, "step": 1728 }, { "epoch": 3.81, "learning_rate": 0.0001711971197119712, "loss": 0.4217, "step": 1792 }, { "epoch": 3.81, "eval_loss": 0.7212890982627869, "eval_runtime": 251.4573, "eval_samples_per_second": 8.145, "eval_wer": 0.674878162761199, "step": 1792 }, { "epoch": 3.95, "learning_rate": 0.00016975697569756977, "loss": 0.4466, "step": 1856 }, { "epoch": 3.95, "eval_loss": 0.7866759300231934, "eval_runtime": 252.4271, "eval_samples_per_second": 8.113, "eval_wer": 0.7039855798117365, "step": 1856 }, { "epoch": 4.09, "learning_rate": 0.00016831683168316833, "loss": 0.4019, "step": 1920 }, { "epoch": 4.09, "eval_loss": 0.724478006362915, "eval_runtime": 252.8043, "eval_samples_per_second": 8.101, "eval_wer": 0.6720074771346551, "step": 1920 }, { "epoch": 4.22, "learning_rate": 0.0001668766876687669, "loss": 0.3812, "step": 1984 }, { "epoch": 4.22, "eval_loss": 0.7306143045425415, "eval_runtime": 252.5346, "eval_samples_per_second": 8.11, "eval_wer": 0.671206355564457, "step": 1984 }, { "epoch": 4.36, "learning_rate": 0.00016543654365436546, "loss": 0.3662, "step": 2048 }, { "epoch": 4.36, "eval_loss": 0.7334802150726318, "eval_runtime": 251.9822, "eval_samples_per_second": 8.128, "eval_wer": 0.6639295013018226, "step": 2048 }, { "epoch": 4.49, "learning_rate": 0.000163996399639964, "loss": 0.3684, "step": 2112 }, { "epoch": 4.49, "eval_loss": 0.7250552177429199, "eval_runtime": 251.8402, "eval_samples_per_second": 8.132, "eval_wer": 0.6481073502904066, "step": 2112 }, { "epoch": 4.63, "learning_rate": 0.00016255625562556255, "loss": 0.3809, "step": 2176 }, { "epoch": 4.63, "eval_loss": 0.7295255064964294, "eval_runtime": 251.8173, "eval_samples_per_second": 8.133, "eval_wer": 0.666533146404967, "step": 2176 }, { "epoch": 4.77, "learning_rate": 0.00016111611161116112, "loss": 0.3618, "step": 2240 }, { "epoch": 4.77, "eval_loss": 0.725806474685669, "eval_runtime": 251.7567, "eval_samples_per_second": 8.135, "eval_wer": 0.6467721476734094, "step": 2240 }, { "epoch": 4.9, "learning_rate": 0.00015967596759675968, "loss": 0.3896, "step": 2304 }, { "epoch": 4.9, "eval_loss": 0.6914154291152954, "eval_runtime": 253.233, "eval_samples_per_second": 8.087, "eval_wer": 0.6378262901395286, "step": 2304 }, { "epoch": 5.04, "learning_rate": 0.00015823582358235824, "loss": 0.3381, "step": 2368 }, { "epoch": 5.04, "eval_loss": 0.7252967357635498, "eval_runtime": 251.9208, "eval_samples_per_second": 8.13, "eval_wer": 0.6556512450764403, "step": 2368 }, { "epoch": 5.17, "learning_rate": 0.0001567956795679568, "loss": 0.3385, "step": 2432 }, { "epoch": 5.17, "eval_loss": 0.739501953125, "eval_runtime": 251.6871, "eval_samples_per_second": 8.137, "eval_wer": 0.6486414313372054, "step": 2432 }, { "epoch": 5.31, "learning_rate": 0.00015535553555355537, "loss": 0.2991, "step": 2496 }, { "epoch": 5.31, "eval_loss": 0.718348503112793, "eval_runtime": 252.6947, "eval_samples_per_second": 8.105, "eval_wer": 0.6476400293744575, "step": 2496 }, { "epoch": 5.45, "learning_rate": 0.00015391539153915393, "loss": 0.3219, "step": 2560 }, { "epoch": 5.45, "eval_loss": 0.6939400434494019, "eval_runtime": 252.3364, "eval_samples_per_second": 8.116, "eval_wer": 0.6454369450564124, "step": 2560 }, { "epoch": 5.58, "learning_rate": 0.0001524752475247525, "loss": 0.3212, "step": 2624 }, { "epoch": 5.58, "eval_loss": 0.723796010017395, "eval_runtime": 252.2722, "eval_samples_per_second": 8.118, "eval_wer": 0.6450363842713132, "step": 2624 }, { "epoch": 5.72, "learning_rate": 0.00015103510351035103, "loss": 0.3152, "step": 2688 }, { "epoch": 5.72, "eval_loss": 0.7430602312088013, "eval_runtime": 252.488, "eval_samples_per_second": 8.111, "eval_wer": 0.6561853261232392, "step": 2688 }, { "epoch": 5.86, "learning_rate": 0.0001495949594959496, "loss": 0.3359, "step": 2752 }, { "epoch": 5.86, "eval_loss": 0.7211728096008301, "eval_runtime": 252.04, "eval_samples_per_second": 8.126, "eval_wer": 0.6389612123639762, "step": 2752 }, { "epoch": 5.99, "learning_rate": 0.00014815481548154818, "loss": 0.3171, "step": 2816 }, { "epoch": 5.99, "eval_loss": 0.707861602306366, "eval_runtime": 253.0372, "eval_samples_per_second": 8.094, "eval_wer": 0.6495093130382535, "step": 2816 }, { "epoch": 6.13, "learning_rate": 0.00014671467146714671, "loss": 0.2806, "step": 2880 }, { "epoch": 6.13, "eval_loss": 0.6907961368560791, "eval_runtime": 254.422, "eval_samples_per_second": 8.05, "eval_wer": 0.6292142332598972, "step": 2880 }, { "epoch": 6.26, "learning_rate": 0.00014527452745274528, "loss": 0.2765, "step": 2944 }, { "epoch": 6.26, "eval_loss": 0.7019878029823303, "eval_runtime": 252.9804, "eval_samples_per_second": 8.095, "eval_wer": 0.6294145136524467, "step": 2944 }, { "epoch": 6.4, "learning_rate": 0.00014383438343834384, "loss": 0.2854, "step": 3008 }, { "epoch": 6.4, "eval_loss": 0.811292290687561, "eval_runtime": 253.017, "eval_samples_per_second": 8.094, "eval_wer": 0.6774818078643434, "step": 3008 }, { "epoch": 6.54, "learning_rate": 0.0001423942394239424, "loss": 0.2805, "step": 3072 }, { "epoch": 6.54, "eval_loss": 0.7887662649154663, "eval_runtime": 251.6217, "eval_samples_per_second": 8.139, "eval_wer": 0.6378930502703786, "step": 3072 }, { "epoch": 6.67, "learning_rate": 0.00014095409540954096, "loss": 0.2861, "step": 3136 }, { "epoch": 6.67, "eval_loss": 0.7010805606842041, "eval_runtime": 252.2814, "eval_samples_per_second": 8.118, "eval_wer": 0.635356165298084, "step": 3136 }, { "epoch": 6.81, "learning_rate": 0.00013951395139513953, "loss": 0.2903, "step": 3200 }, { "epoch": 6.81, "eval_loss": 0.7183523774147034, "eval_runtime": 253.427, "eval_samples_per_second": 8.081, "eval_wer": 0.6266105881567527, "step": 3200 }, { "epoch": 6.94, "learning_rate": 0.00013807380738073806, "loss": 0.2798, "step": 3264 }, { "epoch": 6.94, "eval_loss": 0.7161312699317932, "eval_runtime": 253.1259, "eval_samples_per_second": 8.091, "eval_wer": 0.6217370986047133, "step": 3264 }, { "epoch": 7.08, "learning_rate": 0.00013663366336633665, "loss": 0.2528, "step": 3328 }, { "epoch": 7.08, "eval_loss": 0.7560293674468994, "eval_runtime": 252.846, "eval_samples_per_second": 8.1, "eval_wer": 0.6506442352627011, "step": 3328 }, { "epoch": 7.22, "learning_rate": 0.00013519351935193521, "loss": 0.2549, "step": 3392 }, { "epoch": 7.22, "eval_loss": 0.7602236270904541, "eval_runtime": 253.2296, "eval_samples_per_second": 8.088, "eval_wer": 0.6505774751318513, "step": 3392 }, { "epoch": 7.35, "learning_rate": 0.00013375337533753375, "loss": 0.2431, "step": 3456 }, { "epoch": 7.35, "eval_loss": 0.6875869035720825, "eval_runtime": 252.6805, "eval_samples_per_second": 8.105, "eval_wer": 0.61859937245477, "step": 3456 }, { "epoch": 7.49, "learning_rate": 0.0001323132313231323, "loss": 0.2506, "step": 3520 }, { "epoch": 7.49, "eval_loss": 0.7143360376358032, "eval_runtime": 253.2779, "eval_samples_per_second": 8.086, "eval_wer": 0.6345550437278857, "step": 3520 }, { "epoch": 7.63, "learning_rate": 0.00013087308730873087, "loss": 0.2497, "step": 3584 }, { "epoch": 7.63, "eval_loss": 0.7228794693946838, "eval_runtime": 252.2511, "eval_samples_per_second": 8.119, "eval_wer": 0.6364910875225316, "step": 3584 }, { "epoch": 7.76, "learning_rate": 0.00012943294329432944, "loss": 0.2492, "step": 3648 }, { "epoch": 7.76, "eval_loss": 0.7050414085388184, "eval_runtime": 253.5665, "eval_samples_per_second": 8.077, "eval_wer": 0.6172641698377729, "step": 3648 }, { "epoch": 7.9, "learning_rate": 0.000127992799279928, "loss": 0.2444, "step": 3712 }, { "epoch": 7.9, "eval_loss": 0.7215788960456848, "eval_runtime": 252.8539, "eval_samples_per_second": 8.1, "eval_wer": 0.6314173175779425, "step": 3712 }, { "epoch": 8.03, "learning_rate": 0.00012655265526552656, "loss": 0.2438, "step": 3776 }, { "epoch": 8.03, "eval_loss": 0.7077196836471558, "eval_runtime": 253.6898, "eval_samples_per_second": 8.073, "eval_wer": 0.6182655718005208, "step": 3776 }, { "epoch": 8.17, "learning_rate": 0.0001251125112511251, "loss": 0.2083, "step": 3840 }, { "epoch": 8.17, "eval_loss": 0.7524451613426208, "eval_runtime": 255.0333, "eval_samples_per_second": 8.03, "eval_wer": 0.6258762267174044, "step": 3840 }, { "epoch": 8.31, "learning_rate": 0.0001236723672367237, "loss": 0.204, "step": 3904 }, { "epoch": 8.31, "eval_loss": 0.7626018524169922, "eval_runtime": 253.5087, "eval_samples_per_second": 8.079, "eval_wer": 0.6330863208491888, "step": 3904 }, { "epoch": 8.44, "learning_rate": 0.00012223222322232225, "loss": 0.2353, "step": 3968 }, { "epoch": 8.44, "eval_loss": 0.7177530527114868, "eval_runtime": 253.4757, "eval_samples_per_second": 8.08, "eval_wer": 0.6127244809399827, "step": 3968 }, { "epoch": 8.58, "learning_rate": 0.0001207920792079208, "loss": 0.2208, "step": 4032 }, { "epoch": 8.58, "eval_loss": 0.7555935382843018, "eval_runtime": 253.7282, "eval_samples_per_second": 8.072, "eval_wer": 0.6294812737832967, "step": 4032 }, { "epoch": 8.71, "learning_rate": 0.00011935193519351935, "loss": 0.2266, "step": 4096 }, { "epoch": 8.71, "eval_loss": 0.7341886162757874, "eval_runtime": 253.8255, "eval_samples_per_second": 8.069, "eval_wer": 0.6294812737832967, "step": 4096 }, { "epoch": 8.85, "learning_rate": 0.00011791179117911792, "loss": 0.2188, "step": 4160 }, { "epoch": 8.85, "eval_loss": 0.7594360709190369, "eval_runtime": 253.012, "eval_samples_per_second": 8.094, "eval_wer": 0.6449028640096135, "step": 4160 }, { "epoch": 8.99, "learning_rate": 0.00011647164716471647, "loss": 0.231, "step": 4224 }, { "epoch": 8.99, "eval_loss": 0.6901054382324219, "eval_runtime": 253.458, "eval_samples_per_second": 8.08, "eval_wer": 0.6077842312570932, "step": 4224 }, { "epoch": 9.12, "learning_rate": 0.00011503150315031504, "loss": 0.2063, "step": 4288 }, { "epoch": 9.12, "eval_loss": 0.7627975344657898, "eval_runtime": 253.6008, "eval_samples_per_second": 8.076, "eval_wer": 0.6116563188463849, "step": 4288 }, { "epoch": 9.26, "learning_rate": 0.00011359135913591358, "loss": 0.1931, "step": 4352 }, { "epoch": 9.26, "eval_loss": 0.7425730228424072, "eval_runtime": 254.3179, "eval_samples_per_second": 8.053, "eval_wer": 0.6149943253888778, "step": 4352 }, { "epoch": 9.4, "learning_rate": 0.00011215121512151216, "loss": 0.2189, "step": 4416 }, { "epoch": 9.4, "eval_loss": 0.7322723269462585, "eval_runtime": 254.9695, "eval_samples_per_second": 8.032, "eval_wer": 0.6066493090326457, "step": 4416 }, { "epoch": 9.53, "learning_rate": 0.00011071107110711072, "loss": 0.1931, "step": 4480 }, { "epoch": 9.53, "eval_loss": 0.7253696918487549, "eval_runtime": 253.0608, "eval_samples_per_second": 8.093, "eval_wer": 0.613525602510181, "step": 4480 }, { "epoch": 9.67, "learning_rate": 0.00010927092709270927, "loss": 0.2017, "step": 4544 }, { "epoch": 9.67, "eval_loss": 0.6984794735908508, "eval_runtime": 253.842, "eval_samples_per_second": 8.068, "eval_wer": 0.6072501502102944, "step": 4544 }, { "epoch": 9.8, "learning_rate": 0.00010783078307830783, "loss": 0.2169, "step": 4608 }, { "epoch": 9.8, "eval_loss": 0.698683500289917, "eval_runtime": 253.2368, "eval_samples_per_second": 8.087, "eval_wer": 0.6078509913879431, "step": 4608 }, { "epoch": 9.94, "learning_rate": 0.00010639063906390641, "loss": 0.1956, "step": 4672 }, { "epoch": 9.94, "eval_loss": 0.7127052545547485, "eval_runtime": 254.4368, "eval_samples_per_second": 8.049, "eval_wer": 0.6073836704719942, "step": 4672 }, { "epoch": 10.08, "learning_rate": 0.00010495049504950496, "loss": 0.1972, "step": 4736 }, { "epoch": 10.08, "eval_loss": 0.7555835247039795, "eval_runtime": 253.4953, "eval_samples_per_second": 8.079, "eval_wer": 0.6094532345283397, "step": 4736 }, { "epoch": 10.21, "learning_rate": 0.00010351035103510351, "loss": 0.1827, "step": 4800 }, { "epoch": 10.21, "eval_loss": 0.7492347359657288, "eval_runtime": 255.7545, "eval_samples_per_second": 8.008, "eval_wer": 0.6165298083984244, "step": 4800 }, { "epoch": 10.35, "learning_rate": 0.00010207020702070207, "loss": 0.1798, "step": 4864 }, { "epoch": 10.35, "eval_loss": 0.7403519749641418, "eval_runtime": 254.7315, "eval_samples_per_second": 8.04, "eval_wer": 0.6058481874624474, "step": 4864 }, { "epoch": 10.49, "learning_rate": 0.00010063006300630065, "loss": 0.1839, "step": 4928 }, { "epoch": 10.49, "eval_loss": 0.7588409185409546, "eval_runtime": 255.5749, "eval_samples_per_second": 8.013, "eval_wer": 0.60931971426664, "step": 4928 }, { "epoch": 10.62, "learning_rate": 9.91899189918992e-05, "loss": 0.1823, "step": 4992 }, { "epoch": 10.62, "eval_loss": 0.7923687696456909, "eval_runtime": 254.6347, "eval_samples_per_second": 8.043, "eval_wer": 0.621403297950464, "step": 4992 }, { "epoch": 10.76, "learning_rate": 9.774977497749776e-05, "loss": 0.1749, "step": 5056 }, { "epoch": 10.76, "eval_loss": 0.7493650913238525, "eval_runtime": 252.6472, "eval_samples_per_second": 8.106, "eval_wer": 0.6101875959676881, "step": 5056 }, { "epoch": 10.89, "learning_rate": 9.630963096309631e-05, "loss": 0.1876, "step": 5120 }, { "epoch": 10.89, "eval_loss": 0.7571142911911011, "eval_runtime": 253.4609, "eval_samples_per_second": 8.08, "eval_wer": 0.6097870351825889, "step": 5120 }, { "epoch": 11.03, "learning_rate": 9.486948694869487e-05, "loss": 0.2027, "step": 5184 }, { "epoch": 11.03, "eval_loss": 0.7235008478164673, "eval_runtime": 252.9146, "eval_samples_per_second": 8.098, "eval_wer": 0.6036451031444021, "step": 5184 }, { "epoch": 11.17, "learning_rate": 9.342934293429343e-05, "loss": 0.1699, "step": 5248 }, { "epoch": 11.17, "eval_loss": 0.7639468908309937, "eval_runtime": 253.3789, "eval_samples_per_second": 8.083, "eval_wer": 0.6093864743974898, "step": 5248 }, { "epoch": 11.3, "learning_rate": 9.1989198919892e-05, "loss": 0.1556, "step": 5312 }, { "epoch": 11.3, "eval_loss": 0.7758333683013916, "eval_runtime": 255.9522, "eval_samples_per_second": 8.001, "eval_wer": 0.6128580012016823, "step": 5312 }, { "epoch": 11.44, "learning_rate": 9.054905490549054e-05, "loss": 0.1679, "step": 5376 }, { "epoch": 11.44, "eval_loss": 0.7407320737838745, "eval_runtime": 253.1505, "eval_samples_per_second": 8.09, "eval_wer": 0.5980372521530142, "step": 5376 }, { "epoch": 11.57, "learning_rate": 8.910891089108912e-05, "loss": 0.1681, "step": 5440 }, { "epoch": 11.57, "eval_loss": 0.7515969276428223, "eval_runtime": 254.0821, "eval_samples_per_second": 8.06, "eval_wer": 0.6076507109953936, "step": 5440 }, { "epoch": 11.71, "learning_rate": 8.766876687668767e-05, "loss": 0.1814, "step": 5504 }, { "epoch": 11.71, "eval_loss": 0.7393285036087036, "eval_runtime": 255.9386, "eval_samples_per_second": 8.002, "eval_wer": 0.6063822685092463, "step": 5504 }, { "epoch": 11.85, "learning_rate": 8.622862286228623e-05, "loss": 0.162, "step": 5568 }, { "epoch": 11.85, "eval_loss": 0.7688995003700256, "eval_runtime": 255.0601, "eval_samples_per_second": 8.029, "eval_wer": 0.6063155083783964, "step": 5568 }, { "epoch": 11.98, "learning_rate": 8.47884788478848e-05, "loss": 0.1835, "step": 5632 }, { "epoch": 11.98, "eval_loss": 0.7229721546173096, "eval_runtime": 256.1311, "eval_samples_per_second": 7.996, "eval_wer": 0.6020428600040056, "step": 5632 }, { "epoch": 12.12, "learning_rate": 8.334833483348336e-05, "loss": 0.1491, "step": 5696 }, { "epoch": 12.12, "eval_loss": 0.7780319452285767, "eval_runtime": 253.9764, "eval_samples_per_second": 8.064, "eval_wer": 0.6079845116496428, "step": 5696 }, { "epoch": 12.26, "learning_rate": 8.19081908190819e-05, "loss": 0.163, "step": 5760 }, { "epoch": 12.26, "eval_loss": 0.7481300830841064, "eval_runtime": 255.5174, "eval_samples_per_second": 8.015, "eval_wer": 0.6035115828827025, "step": 5760 }, { "epoch": 12.39, "learning_rate": 8.046804680468047e-05, "loss": 0.1564, "step": 5824 }, { "epoch": 12.39, "eval_loss": 0.7477182149887085, "eval_runtime": 253.9361, "eval_samples_per_second": 8.065, "eval_wer": 0.6073169103411443, "step": 5824 }, { "epoch": 12.53, "learning_rate": 7.902790279027903e-05, "loss": 0.1651, "step": 5888 }, { "epoch": 12.53, "eval_loss": 0.7700252532958984, "eval_runtime": 255.811, "eval_samples_per_second": 8.006, "eval_wer": 0.6119901195006342, "step": 5888 }, { "epoch": 12.66, "learning_rate": 7.75877587758776e-05, "loss": 0.1406, "step": 5952 }, { "epoch": 12.66, "eval_loss": 0.7303926348686218, "eval_runtime": 255.3564, "eval_samples_per_second": 8.02, "eval_wer": 0.6010414580412577, "step": 5952 }, { "epoch": 12.8, "learning_rate": 7.614761476147616e-05, "loss": 0.1454, "step": 6016 }, { "epoch": 12.8, "eval_loss": 0.7804365158081055, "eval_runtime": 258.3951, "eval_samples_per_second": 7.926, "eval_wer": 0.6108551972761866, "step": 6016 }, { "epoch": 12.94, "learning_rate": 7.470747074707472e-05, "loss": 0.1405, "step": 6080 }, { "epoch": 12.94, "eval_loss": 0.7660069465637207, "eval_runtime": 258.3136, "eval_samples_per_second": 7.928, "eval_wer": 0.5965017691434675, "step": 6080 }, { "epoch": 13.07, "learning_rate": 7.326732673267327e-05, "loss": 0.142, "step": 6144 }, { "epoch": 13.07, "eval_loss": 0.7802821397781372, "eval_runtime": 255.9638, "eval_samples_per_second": 8.001, "eval_wer": 0.6066493090326457, "step": 6144 }, { "epoch": 13.21, "learning_rate": 7.182718271827183e-05, "loss": 0.1423, "step": 6208 }, { "epoch": 13.21, "eval_loss": 0.7940844297409058, "eval_runtime": 257.753, "eval_samples_per_second": 7.946, "eval_wer": 0.6061152279858468, "step": 6208 }, { "epoch": 13.34, "learning_rate": 7.038703870387039e-05, "loss": 0.1494, "step": 6272 }, { "epoch": 13.34, "eval_loss": 0.7598519325256348, "eval_runtime": 257.9759, "eval_samples_per_second": 7.939, "eval_wer": 0.6009079377795581, "step": 6272 }, { "epoch": 13.48, "learning_rate": 6.894689468946895e-05, "loss": 0.1459, "step": 6336 }, { "epoch": 13.48, "eval_loss": 0.7658703327178955, "eval_runtime": 257.4947, "eval_samples_per_second": 7.954, "eval_wer": 0.6087188730889913, "step": 6336 }, { "epoch": 13.62, "learning_rate": 6.75067506750675e-05, "loss": 0.1316, "step": 6400 }, { "epoch": 13.62, "eval_loss": 0.7935535311698914, "eval_runtime": 256.5247, "eval_samples_per_second": 7.984, "eval_wer": 0.6047132652379998, "step": 6400 }, { "epoch": 13.75, "learning_rate": 6.606660666066608e-05, "loss": 0.1428, "step": 6464 }, { "epoch": 13.75, "eval_loss": 0.782112181186676, "eval_runtime": 259.1672, "eval_samples_per_second": 7.902, "eval_wer": 0.5998397756859604, "step": 6464 }, { "epoch": 13.89, "learning_rate": 6.462646264626463e-05, "loss": 0.1391, "step": 6528 }, { "epoch": 13.89, "eval_loss": 0.7535277605056763, "eval_runtime": 257.3279, "eval_samples_per_second": 7.959, "eval_wer": 0.5976366913679151, "step": 6528 }, { "epoch": 14.03, "learning_rate": 6.318631863186318e-05, "loss": 0.138, "step": 6592 }, { "epoch": 14.03, "eval_loss": 0.7846142649650574, "eval_runtime": 257.0571, "eval_samples_per_second": 7.967, "eval_wer": 0.5956338874424194, "step": 6592 }, { "epoch": 14.16, "learning_rate": 6.174617461746175e-05, "loss": 0.1299, "step": 6656 }, { "epoch": 14.16, "eval_loss": 0.7645628452301025, "eval_runtime": 259.0072, "eval_samples_per_second": 7.907, "eval_wer": 0.5987716135923626, "step": 6656 }, { "epoch": 14.3, "learning_rate": 6.03060306030603e-05, "loss": 0.1287, "step": 6720 }, { "epoch": 14.3, "eval_loss": 0.8099244832992554, "eval_runtime": 258.7408, "eval_samples_per_second": 7.915, "eval_wer": 0.612056879631484, "step": 6720 }, { "epoch": 14.43, "learning_rate": 5.886588658865887e-05, "loss": 0.1288, "step": 6784 }, { "epoch": 14.43, "eval_loss": 0.8042709231376648, "eval_runtime": 258.8574, "eval_samples_per_second": 7.912, "eval_wer": 0.6015755390880566, "step": 6784 }, { "epoch": 14.57, "learning_rate": 5.742574257425742e-05, "loss": 0.131, "step": 6848 }, { "epoch": 14.57, "eval_loss": 0.7604876756668091, "eval_runtime": 256.5152, "eval_samples_per_second": 7.984, "eval_wer": 0.5951665665264704, "step": 6848 }, { "epoch": 14.71, "learning_rate": 5.598559855985599e-05, "loss": 0.1353, "step": 6912 }, { "epoch": 14.71, "eval_loss": 0.784662127494812, "eval_runtime": 256.9735, "eval_samples_per_second": 7.97, "eval_wer": 0.59997329594766, "step": 6912 }, { "epoch": 14.84, "learning_rate": 5.4545454545454546e-05, "loss": 0.133, "step": 6976 }, { "epoch": 14.84, "eval_loss": 0.8255736827850342, "eval_runtime": 256.2102, "eval_samples_per_second": 7.993, "eval_wer": 0.6010414580412577, "step": 6976 }, { "epoch": 14.98, "learning_rate": 5.310531053105311e-05, "loss": 0.1351, "step": 7040 }, { "epoch": 14.98, "eval_loss": 0.7962599992752075, "eval_runtime": 256.4813, "eval_samples_per_second": 7.985, "eval_wer": 0.5983710528072635, "step": 7040 }, { "epoch": 15.11, "learning_rate": 5.1665166516651664e-05, "loss": 0.1156, "step": 7104 }, { "epoch": 15.11, "eval_loss": 0.8220678567886353, "eval_runtime": 255.8959, "eval_samples_per_second": 8.003, "eval_wer": 0.6015087789572068, "step": 7104 }, { "epoch": 15.25, "learning_rate": 5.022502250225023e-05, "loss": 0.1171, "step": 7168 }, { "epoch": 15.25, "eval_loss": 0.7746037840843201, "eval_runtime": 255.6563, "eval_samples_per_second": 8.011, "eval_wer": 0.592562921423326, "step": 7168 }, { "epoch": 15.39, "learning_rate": 4.878487848784879e-05, "loss": 0.1213, "step": 7232 }, { "epoch": 15.39, "eval_loss": 0.8010141253471375, "eval_runtime": 256.8217, "eval_samples_per_second": 7.974, "eval_wer": 0.6013084985646572, "step": 7232 }, { "epoch": 15.52, "learning_rate": 4.734473447344735e-05, "loss": 0.1175, "step": 7296 }, { "epoch": 15.52, "eval_loss": 0.8121392726898193, "eval_runtime": 255.5954, "eval_samples_per_second": 8.013, "eval_wer": 0.6077174711262434, "step": 7296 }, { "epoch": 15.66, "learning_rate": 4.59045904590459e-05, "loss": 0.12, "step": 7360 }, { "epoch": 15.66, "eval_loss": 0.7914840579032898, "eval_runtime": 256.4734, "eval_samples_per_second": 7.985, "eval_wer": 0.5952333266573202, "step": 7360 }, { "epoch": 15.8, "learning_rate": 4.4464446444644464e-05, "loss": 0.1147, "step": 7424 }, { "epoch": 15.8, "eval_loss": 0.7968710660934448, "eval_runtime": 255.805, "eval_samples_per_second": 8.006, "eval_wer": 0.5996394952934108, "step": 7424 }, { "epoch": 15.93, "learning_rate": 4.3024302430243026e-05, "loss": 0.1217, "step": 7488 }, { "epoch": 15.93, "eval_loss": 0.8240338563919067, "eval_runtime": 254.7962, "eval_samples_per_second": 8.038, "eval_wer": 0.6034448227518526, "step": 7488 }, { "epoch": 16.07, "learning_rate": 4.158415841584158e-05, "loss": 0.1088, "step": 7552 }, { "epoch": 16.07, "eval_loss": 0.8148845434188843, "eval_runtime": 255.3742, "eval_samples_per_second": 8.02, "eval_wer": 0.6006408972561587, "step": 7552 }, { "epoch": 16.2, "learning_rate": 4.0144014401440144e-05, "loss": 0.1086, "step": 7616 }, { "epoch": 16.2, "eval_loss": 0.8434123992919922, "eval_runtime": 256.199, "eval_samples_per_second": 7.994, "eval_wer": 0.6020428600040056, "step": 7616 }, { "epoch": 16.34, "learning_rate": 3.870387038703871e-05, "loss": 0.122, "step": 7680 }, { "epoch": 16.34, "eval_loss": 0.8277792930603027, "eval_runtime": 255.246, "eval_samples_per_second": 8.024, "eval_wer": 0.5944989652179719, "step": 7680 }, { "epoch": 16.48, "learning_rate": 3.726372637263726e-05, "loss": 0.109, "step": 7744 }, { "epoch": 16.48, "eval_loss": 0.8119443655014038, "eval_runtime": 255.9109, "eval_samples_per_second": 8.003, "eval_wer": 0.5994392149008612, "step": 7744 }, { "epoch": 16.61, "learning_rate": 3.5823582358235825e-05, "loss": 0.1159, "step": 7808 }, { "epoch": 16.61, "eval_loss": 0.8458046913146973, "eval_runtime": 255.3949, "eval_samples_per_second": 8.019, "eval_wer": 0.6047800253688497, "step": 7808 }, { "epoch": 16.75, "learning_rate": 3.438343834383439e-05, "loss": 0.0992, "step": 7872 }, { "epoch": 16.75, "eval_loss": 0.8495743870735168, "eval_runtime": 255.6285, "eval_samples_per_second": 8.012, "eval_wer": 0.6048467854996996, "step": 7872 }, { "epoch": 16.89, "learning_rate": 3.294329432943294e-05, "loss": 0.1078, "step": 7936 }, { "epoch": 16.89, "eval_loss": 0.8393277525901794, "eval_runtime": 255.6117, "eval_samples_per_second": 8.012, "eval_wer": 0.5990386541157621, "step": 7936 }, { "epoch": 17.02, "learning_rate": 3.1503150315031506e-05, "loss": 0.1096, "step": 8000 }, { "epoch": 17.02, "eval_loss": 0.8454439640045166, "eval_runtime": 255.5979, "eval_samples_per_second": 8.013, "eval_wer": 0.5995059750317111, "step": 8000 }, { "epoch": 17.16, "learning_rate": 3.0063006300630065e-05, "loss": 0.0986, "step": 8064 }, { "epoch": 17.16, "eval_loss": 0.8286423683166504, "eval_runtime": 257.011, "eval_samples_per_second": 7.969, "eval_wer": 0.6011082181721076, "step": 8064 }, { "epoch": 17.29, "learning_rate": 2.8622862286228624e-05, "loss": 0.1154, "step": 8128 }, { "epoch": 17.29, "eval_loss": 0.8368263244628906, "eval_runtime": 258.2275, "eval_samples_per_second": 7.931, "eval_wer": 0.5988383737232125, "step": 8128 }, { "epoch": 17.43, "learning_rate": 2.7182718271827183e-05, "loss": 0.0927, "step": 8192 }, { "epoch": 17.43, "eval_loss": 0.861258864402771, "eval_runtime": 257.1554, "eval_samples_per_second": 7.964, "eval_wer": 0.6089191534815408, "step": 8192 }, { "epoch": 17.57, "learning_rate": 2.5742574257425746e-05, "loss": 0.1056, "step": 8256 }, { "epoch": 17.57, "eval_loss": 0.8197841644287109, "eval_runtime": 256.4042, "eval_samples_per_second": 7.987, "eval_wer": 0.592562921423326, "step": 8256 }, { "epoch": 17.7, "learning_rate": 2.4302430243024305e-05, "loss": 0.1112, "step": 8320 }, { "epoch": 17.7, "eval_loss": 0.8646882772445679, "eval_runtime": 257.1754, "eval_samples_per_second": 7.963, "eval_wer": 0.5978369717604647, "step": 8320 }, { "epoch": 17.84, "learning_rate": 2.2862286228622864e-05, "loss": 0.0962, "step": 8384 }, { "epoch": 17.84, "eval_loss": 0.8387396335601807, "eval_runtime": 255.7406, "eval_samples_per_second": 8.008, "eval_wer": 0.5963014887509179, "step": 8384 }, { "epoch": 17.97, "learning_rate": 2.1422142214221423e-05, "loss": 0.0973, "step": 8448 }, { "epoch": 17.97, "eval_loss": 0.8584703207015991, "eval_runtime": 256.391, "eval_samples_per_second": 7.988, "eval_wer": 0.5963682488817678, "step": 8448 }, { "epoch": 18.11, "learning_rate": 1.9981998199819982e-05, "loss": 0.1013, "step": 8512 }, { "epoch": 18.11, "eval_loss": 0.8741297721862793, "eval_runtime": 257.1245, "eval_samples_per_second": 7.965, "eval_wer": 0.6001735763402096, "step": 8512 }, { "epoch": 18.25, "learning_rate": 1.854185418541854e-05, "loss": 0.1037, "step": 8576 }, { "epoch": 18.25, "eval_loss": 0.8630872368812561, "eval_runtime": 257.6611, "eval_samples_per_second": 7.948, "eval_wer": 0.5943654449562721, "step": 8576 }, { "epoch": 18.38, "learning_rate": 1.71017101710171e-05, "loss": 0.0947, "step": 8640 }, { "epoch": 18.38, "eval_loss": 0.86741703748703, "eval_runtime": 257.8968, "eval_samples_per_second": 7.941, "eval_wer": 0.596768809666867, "step": 8640 }, { "epoch": 18.52, "learning_rate": 1.5661566156615663e-05, "loss": 0.0884, "step": 8704 }, { "epoch": 18.52, "eval_loss": 0.8764230608940125, "eval_runtime": 256.705, "eval_samples_per_second": 7.978, "eval_wer": 0.5943654449562721, "step": 8704 }, { "epoch": 18.66, "learning_rate": 1.4221422142214222e-05, "loss": 0.0997, "step": 8768 }, { "epoch": 18.66, "eval_loss": 0.8616538047790527, "eval_runtime": 256.2987, "eval_samples_per_second": 7.991, "eval_wer": 0.593030242339275, "step": 8768 }, { "epoch": 18.79, "learning_rate": 1.2781278127812781e-05, "loss": 0.1066, "step": 8832 }, { "epoch": 18.79, "eval_loss": 0.8461120128631592, "eval_runtime": 255.9062, "eval_samples_per_second": 8.003, "eval_wer": 0.5889578743574337, "step": 8832 }, { "epoch": 18.93, "learning_rate": 1.1341134113411342e-05, "loss": 0.0952, "step": 8896 }, { "epoch": 18.93, "eval_loss": 0.8545425534248352, "eval_runtime": 254.9463, "eval_samples_per_second": 8.033, "eval_wer": 0.5970358501902664, "step": 8896 }, { "epoch": 19.06, "learning_rate": 9.900990099009901e-06, "loss": 0.1094, "step": 8960 }, { "epoch": 19.06, "eval_loss": 0.8487824201583862, "eval_runtime": 255.9015, "eval_samples_per_second": 8.003, "eval_wer": 0.5928967220775753, "step": 8960 }, { "epoch": 19.2, "learning_rate": 8.46084608460846e-06, "loss": 0.1023, "step": 9024 }, { "epoch": 19.2, "eval_loss": 0.8481884002685547, "eval_runtime": 255.378, "eval_samples_per_second": 8.019, "eval_wer": 0.5924961612924762, "step": 9024 }, { "epoch": 19.34, "learning_rate": 7.02070207020702e-06, "loss": 0.0896, "step": 9088 }, { "epoch": 19.34, "eval_loss": 0.8508026003837585, "eval_runtime": 258.1213, "eval_samples_per_second": 7.934, "eval_wer": 0.5942319246945724, "step": 9088 }, { "epoch": 19.47, "learning_rate": 5.58055805580558e-06, "loss": 0.1055, "step": 9152 }, { "epoch": 19.47, "eval_loss": 0.8545140027999878, "eval_runtime": 255.3836, "eval_samples_per_second": 8.019, "eval_wer": 0.5957674077041191, "step": 9152 }, { "epoch": 19.61, "learning_rate": 4.14041404140414e-06, "loss": 0.1102, "step": 9216 }, { "epoch": 19.61, "eval_loss": 0.8519406318664551, "eval_runtime": 255.7976, "eval_samples_per_second": 8.006, "eval_wer": 0.5952333266573202, "step": 9216 }, { "epoch": 19.74, "learning_rate": 2.7002700270027004e-06, "loss": 0.1021, "step": 9280 }, { "epoch": 19.74, "eval_loss": 0.8542845249176025, "eval_runtime": 255.9288, "eval_samples_per_second": 8.002, "eval_wer": 0.5940984044328727, "step": 9280 }, { "epoch": 19.88, "learning_rate": 1.2601260126012601e-06, "loss": 0.0934, "step": 9344 }, { "epoch": 19.88, "eval_loss": 0.8573585748672485, "eval_runtime": 256.1564, "eval_samples_per_second": 7.995, "eval_wer": 0.5932305227318245, "step": 9344 }, { "epoch": 20.0, "step": 9400, "total_flos": 5.3055511592602894e+19, "train_runtime": 80232.1889, "train_samples_per_second": 0.117 } ], "max_steps": 9400, "num_train_epochs": 20, "total_flos": 5.3055511592602894e+19, "trial_name": null, "trial_params": null }