{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 9740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.41, "learning_rate": 0.0001176, "loss": 6.6767, "step": 200 }, { "epoch": 0.41, "eval_loss": 2.9083569049835205, "eval_runtime": 152.4684, "eval_samples_per_second": 8.513, "eval_steps_per_second": 1.069, "eval_wer": 0.9999112452294311, "step": 200 }, { "epoch": 0.82, "learning_rate": 0.0002376, "loss": 2.882, "step": 400 }, { "epoch": 0.82, "eval_loss": 2.8539981842041016, "eval_runtime": 152.5373, "eval_samples_per_second": 8.509, "eval_steps_per_second": 1.069, "eval_wer": 0.9999112452294311, "step": 400 }, { "epoch": 1.23, "learning_rate": 0.00029688311688311686, "loss": 1.8878, "step": 600 }, { "epoch": 1.23, "eval_loss": 0.4857412874698639, "eval_runtime": 153.2041, "eval_samples_per_second": 8.472, "eval_steps_per_second": 1.064, "eval_wer": 0.6288275494807846, "step": 600 }, { "epoch": 1.64, "learning_rate": 0.00029038961038961033, "loss": 0.8319, "step": 800 }, { "epoch": 1.64, "eval_loss": 0.3269199728965759, "eval_runtime": 153.5742, "eval_samples_per_second": 8.452, "eval_steps_per_second": 1.061, "eval_wer": 0.49800301766219934, "step": 800 }, { "epoch": 2.05, "learning_rate": 0.00028389610389610386, "loss": 0.6981, "step": 1000 }, { "epoch": 2.05, "eval_loss": 0.24910558760166168, "eval_runtime": 153.4605, "eval_samples_per_second": 8.458, "eval_steps_per_second": 1.062, "eval_wer": 0.42904056093015, "step": 1000 }, { "epoch": 2.46, "learning_rate": 0.0002774025974025974, "loss": 0.602, "step": 1200 }, { "epoch": 2.46, "eval_loss": 0.2122802734375, "eval_runtime": 153.4584, "eval_samples_per_second": 8.458, "eval_steps_per_second": 1.062, "eval_wer": 0.38164551344634773, "step": 1200 }, { "epoch": 2.87, "learning_rate": 0.0002709090909090909, "loss": 0.5643, "step": 1400 }, { "epoch": 2.87, "eval_loss": 0.19468843936920166, "eval_runtime": 153.4192, "eval_samples_per_second": 8.46, "eval_steps_per_second": 1.062, "eval_wer": 0.36185319960947904, "step": 1400 }, { "epoch": 3.29, "learning_rate": 0.0002644155844155844, "loss": 0.5261, "step": 1600 }, { "epoch": 3.29, "eval_loss": 0.1933012455701828, "eval_runtime": 153.5939, "eval_samples_per_second": 8.451, "eval_steps_per_second": 1.061, "eval_wer": 0.3587467826395669, "step": 1600 }, { "epoch": 3.7, "learning_rate": 0.00025792207792207786, "loss": 0.5094, "step": 1800 }, { "epoch": 3.7, "eval_loss": 0.17698752880096436, "eval_runtime": 154.3088, "eval_samples_per_second": 8.412, "eval_steps_per_second": 1.056, "eval_wer": 0.33362918256856305, "step": 1800 }, { "epoch": 4.11, "learning_rate": 0.0002514285714285714, "loss": 0.4871, "step": 2000 }, { "epoch": 4.11, "eval_loss": 0.17310026288032532, "eval_runtime": 154.2506, "eval_samples_per_second": 8.415, "eval_steps_per_second": 1.057, "eval_wer": 0.32067098606550104, "step": 2000 }, { "epoch": 4.52, "learning_rate": 0.0002449350649350649, "loss": 0.4537, "step": 2200 }, { "epoch": 4.52, "eval_loss": 0.15884645283222198, "eval_runtime": 154.4502, "eval_samples_per_second": 8.404, "eval_steps_per_second": 1.055, "eval_wer": 0.2984822934232715, "step": 2200 }, { "epoch": 4.93, "learning_rate": 0.00023844155844155842, "loss": 0.4365, "step": 2400 }, { "epoch": 4.93, "eval_loss": 0.14789989590644836, "eval_runtime": 154.3892, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.056, "eval_wer": 0.29289074287742967, "step": 2400 }, { "epoch": 5.34, "learning_rate": 0.00023194805194805192, "loss": 0.4132, "step": 2600 }, { "epoch": 5.34, "eval_loss": 0.15590806305408478, "eval_runtime": 154.486, "eval_samples_per_second": 8.402, "eval_steps_per_second": 1.055, "eval_wer": 0.29093813792491346, "step": 2600 }, { "epoch": 5.75, "learning_rate": 0.00022545454545454542, "loss": 0.4043, "step": 2800 }, { "epoch": 5.75, "eval_loss": 0.14599837362766266, "eval_runtime": 154.6579, "eval_samples_per_second": 8.393, "eval_steps_per_second": 1.054, "eval_wer": 0.2759385816987663, "step": 2800 }, { "epoch": 6.16, "learning_rate": 0.00021896103896103895, "loss": 0.3933, "step": 3000 }, { "epoch": 6.16, "eval_loss": 0.1374884694814682, "eval_runtime": 154.356, "eval_samples_per_second": 8.409, "eval_steps_per_second": 1.056, "eval_wer": 0.26981450252951095, "step": 3000 }, { "epoch": 6.57, "learning_rate": 0.00021246753246753245, "loss": 0.3784, "step": 3200 }, { "epoch": 6.57, "eval_loss": 0.13286827504634857, "eval_runtime": 154.759, "eval_samples_per_second": 8.387, "eval_steps_per_second": 1.053, "eval_wer": 0.2592526848318097, "step": 3200 }, { "epoch": 6.98, "learning_rate": 0.00020597402597402598, "loss": 0.3749, "step": 3400 }, { "epoch": 6.98, "eval_loss": 0.13346754014492035, "eval_runtime": 154.6584, "eval_samples_per_second": 8.393, "eval_steps_per_second": 1.054, "eval_wer": 0.2542824176799503, "step": 3400 }, { "epoch": 7.39, "learning_rate": 0.00019948051948051945, "loss": 0.3507, "step": 3600 }, { "epoch": 7.39, "eval_loss": 0.1268981546163559, "eval_runtime": 154.9793, "eval_samples_per_second": 8.375, "eval_steps_per_second": 1.052, "eval_wer": 0.2498446791515044, "step": 3600 }, { "epoch": 7.8, "learning_rate": 0.00019298701298701295, "loss": 0.3523, "step": 3800 }, { "epoch": 7.8, "eval_loss": 0.12715502083301544, "eval_runtime": 155.4621, "eval_samples_per_second": 8.349, "eval_steps_per_second": 1.048, "eval_wer": 0.25064347208662463, "step": 3800 }, { "epoch": 8.21, "learning_rate": 0.00018649350649350648, "loss": 0.3389, "step": 4000 }, { "epoch": 8.21, "eval_loss": 0.1217104122042656, "eval_runtime": 154.736, "eval_samples_per_second": 8.388, "eval_steps_per_second": 1.053, "eval_wer": 0.23866157805982072, "step": 4000 }, { "epoch": 8.62, "learning_rate": 0.00017999999999999998, "loss": 0.3223, "step": 4200 }, { "epoch": 8.62, "eval_loss": 0.12116999924182892, "eval_runtime": 154.7876, "eval_samples_per_second": 8.386, "eval_steps_per_second": 1.053, "eval_wer": 0.23298127274340996, "step": 4200 }, { "epoch": 9.03, "learning_rate": 0.0001735064935064935, "loss": 0.3232, "step": 4400 }, { "epoch": 9.03, "eval_loss": 0.122792087495327, "eval_runtime": 155.7586, "eval_samples_per_second": 8.333, "eval_steps_per_second": 1.046, "eval_wer": 0.2297861010029289, "step": 4400 }, { "epoch": 9.45, "learning_rate": 0.00016701298701298699, "loss": 0.3107, "step": 4600 }, { "epoch": 9.45, "eval_loss": 0.11764618009328842, "eval_runtime": 154.9959, "eval_samples_per_second": 8.374, "eval_steps_per_second": 1.052, "eval_wer": 0.22747847696813703, "step": 4600 }, { "epoch": 9.86, "learning_rate": 0.0001605194805194805, "loss": 0.3096, "step": 4800 }, { "epoch": 9.86, "eval_loss": 0.11639699339866638, "eval_runtime": 155.3496, "eval_samples_per_second": 8.355, "eval_steps_per_second": 1.049, "eval_wer": 0.22747847696813703, "step": 4800 }, { "epoch": 10.27, "learning_rate": 0.00015402597402597402, "loss": 0.2997, "step": 5000 }, { "epoch": 10.27, "eval_loss": 0.11194771528244019, "eval_runtime": 155.1231, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.051, "eval_wer": 0.21736043312328038, "step": 5000 }, { "epoch": 10.68, "learning_rate": 0.00014753246753246752, "loss": 0.2988, "step": 5200 }, { "epoch": 10.68, "eval_loss": 0.10963477194309235, "eval_runtime": 155.3535, "eval_samples_per_second": 8.355, "eval_steps_per_second": 1.049, "eval_wer": 0.2178929617466939, "step": 5200 }, { "epoch": 11.09, "learning_rate": 0.00014103896103896102, "loss": 0.2846, "step": 5400 }, { "epoch": 11.09, "eval_loss": 0.11308039724826813, "eval_runtime": 155.4711, "eval_samples_per_second": 8.349, "eval_steps_per_second": 1.048, "eval_wer": 0.21372148752995473, "step": 5400 }, { "epoch": 11.5, "learning_rate": 0.00013454545454545455, "loss": 0.2752, "step": 5600 }, { "epoch": 11.5, "eval_loss": 0.11051586270332336, "eval_runtime": 155.7324, "eval_samples_per_second": 8.335, "eval_steps_per_second": 1.047, "eval_wer": 0.21168012780686962, "step": 5600 }, { "epoch": 11.91, "learning_rate": 0.00012805194805194805, "loss": 0.272, "step": 5800 }, { "epoch": 11.91, "eval_loss": 0.10839453339576721, "eval_runtime": 156.1054, "eval_samples_per_second": 8.315, "eval_steps_per_second": 1.044, "eval_wer": 0.21034880624833585, "step": 5800 }, { "epoch": 12.32, "learning_rate": 0.00012155844155844156, "loss": 0.2639, "step": 6000 }, { "epoch": 12.32, "eval_loss": 0.10824086517095566, "eval_runtime": 155.9597, "eval_samples_per_second": 8.323, "eval_steps_per_second": 1.045, "eval_wer": 0.21176888257743853, "step": 6000 }, { "epoch": 12.73, "learning_rate": 0.00011506493506493505, "loss": 0.265, "step": 6200 }, { "epoch": 12.73, "eval_loss": 0.11145278066396713, "eval_runtime": 156.0773, "eval_samples_per_second": 8.316, "eval_steps_per_second": 1.044, "eval_wer": 0.2056448034081832, "step": 6200 }, { "epoch": 13.14, "learning_rate": 0.00010857142857142856, "loss": 0.2627, "step": 6400 }, { "epoch": 13.14, "eval_loss": 0.10837554931640625, "eval_runtime": 155.763, "eval_samples_per_second": 8.333, "eval_steps_per_second": 1.046, "eval_wer": 0.20608857726102778, "step": 6400 }, { "epoch": 13.55, "learning_rate": 0.00010207792207792206, "loss": 0.25, "step": 6600 }, { "epoch": 13.55, "eval_loss": 0.11012164503335953, "eval_runtime": 155.919, "eval_samples_per_second": 8.325, "eval_steps_per_second": 1.045, "eval_wer": 0.2057335581787521, "step": 6600 }, { "epoch": 13.96, "learning_rate": 9.558441558441558e-05, "loss": 0.2478, "step": 6800 }, { "epoch": 13.96, "eval_loss": 0.10358110070228577, "eval_runtime": 155.6599, "eval_samples_per_second": 8.339, "eval_steps_per_second": 1.047, "eval_wer": 0.20253838643827105, "step": 6800 }, { "epoch": 14.37, "learning_rate": 8.91233766233766e-05, "loss": 0.2438, "step": 7000 }, { "epoch": 14.37, "eval_loss": 0.1090790405869484, "eval_runtime": 155.41, "eval_samples_per_second": 8.352, "eval_steps_per_second": 1.049, "eval_wer": 0.20058578148575487, "step": 7000 }, { "epoch": 14.78, "learning_rate": 8.262987012987012e-05, "loss": 0.2339, "step": 7200 }, { "epoch": 14.78, "eval_loss": 0.10931634157896042, "eval_runtime": 155.5722, "eval_samples_per_second": 8.343, "eval_steps_per_second": 1.048, "eval_wer": 0.20005325286234135, "step": 7200 }, { "epoch": 15.2, "learning_rate": 7.613636363636363e-05, "loss": 0.2298, "step": 7400 }, { "epoch": 15.2, "eval_loss": 0.11316312849521637, "eval_runtime": 177.5447, "eval_samples_per_second": 7.311, "eval_steps_per_second": 0.918, "eval_wer": 0.20067453625632378, "step": 7400 }, { "epoch": 15.61, "learning_rate": 6.964285714285713e-05, "loss": 0.2239, "step": 7600 }, { "epoch": 15.61, "eval_loss": 0.10355914384126663, "eval_runtime": 160.3854, "eval_samples_per_second": 8.093, "eval_steps_per_second": 1.016, "eval_wer": 0.1958817786456022, "step": 7600 }, { "epoch": 16.02, "learning_rate": 6.314935064935063e-05, "loss": 0.2227, "step": 7800 }, { "epoch": 16.02, "eval_loss": 0.10628173500299454, "eval_runtime": 156.445, "eval_samples_per_second": 8.297, "eval_steps_per_second": 1.042, "eval_wer": 0.19810064790982515, "step": 7800 }, { "epoch": 16.43, "learning_rate": 5.665584415584415e-05, "loss": 0.2184, "step": 8000 }, { "epoch": 16.43, "eval_loss": 0.1046755313873291, "eval_runtime": 155.9684, "eval_samples_per_second": 8.322, "eval_steps_per_second": 1.045, "eval_wer": 0.19774562882754948, "step": 8000 }, { "epoch": 16.84, "learning_rate": 5.016233766233765e-05, "loss": 0.2217, "step": 8200 }, { "epoch": 16.84, "eval_loss": 0.10504923015832901, "eval_runtime": 155.7392, "eval_samples_per_second": 8.334, "eval_steps_per_second": 1.047, "eval_wer": 0.1934853998402414, "step": 8200 }, { "epoch": 17.25, "learning_rate": 4.3668831168831165e-05, "loss": 0.2083, "step": 8400 }, { "epoch": 17.25, "eval_loss": 0.10518524795770645, "eval_runtime": 155.7435, "eval_samples_per_second": 8.334, "eval_steps_per_second": 1.047, "eval_wer": 0.19259785213455224, "step": 8400 }, { "epoch": 17.66, "learning_rate": 3.717532467532467e-05, "loss": 0.2092, "step": 8600 }, { "epoch": 17.66, "eval_loss": 0.10656626522541046, "eval_runtime": 155.7623, "eval_samples_per_second": 8.333, "eval_steps_per_second": 1.046, "eval_wer": 0.19392917369308602, "step": 8600 }, { "epoch": 18.07, "learning_rate": 3.068181818181818e-05, "loss": 0.2137, "step": 8800 }, { "epoch": 18.07, "eval_loss": 0.1052984818816185, "eval_runtime": 157.7405, "eval_samples_per_second": 8.229, "eval_steps_per_second": 1.033, "eval_wer": 0.19375166415194817, "step": 8800 }, { "epoch": 18.48, "learning_rate": 2.418831168831169e-05, "loss": 0.2031, "step": 9000 }, { "epoch": 18.48, "eval_loss": 0.10525061190128326, "eval_runtime": 155.9945, "eval_samples_per_second": 8.321, "eval_steps_per_second": 1.045, "eval_wer": 0.19082275672317386, "step": 9000 }, { "epoch": 18.89, "learning_rate": 1.7694805194805193e-05, "loss": 0.2066, "step": 9200 }, { "epoch": 18.89, "eval_loss": 0.10611088573932648, "eval_runtime": 156.0935, "eval_samples_per_second": 8.316, "eval_steps_per_second": 1.044, "eval_wer": 0.19126653057601847, "step": 9200 }, { "epoch": 19.3, "learning_rate": 1.12012987012987e-05, "loss": 0.2011, "step": 9400 }, { "epoch": 19.3, "eval_loss": 0.1054379940032959, "eval_runtime": 155.9491, "eval_samples_per_second": 8.323, "eval_steps_per_second": 1.045, "eval_wer": 0.19171030442886305, "step": 9400 }, { "epoch": 19.71, "learning_rate": 4.707792207792207e-06, "loss": 0.2014, "step": 9600 }, { "epoch": 19.71, "eval_loss": 0.10496143996715546, "eval_runtime": 156.6534, "eval_samples_per_second": 8.286, "eval_steps_per_second": 1.041, "eval_wer": 0.1914440401171563, "step": 9600 }, { "epoch": 20.0, "step": 9740, "total_flos": 4.083899913785416e+19, "train_loss": 0.5487961105252683, "train_runtime": 44938.105, "train_samples_per_second": 5.199, "train_steps_per_second": 0.217 } ], "max_steps": 9740, "num_train_epochs": 20, "total_flos": 4.083899913785416e+19, "trial_name": null, "trial_params": null }