{ "best_metric": null, "best_model_checkpoint": null, "epoch": 158.80044407438245, "global_step": 286000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.11, "learning_rate": 9.968592592592593e-05, "loss": 1.9211, "step": 2000 }, { "epoch": 1.11, "eval_loss": 0.3011658489704132, "eval_runtime": 109.7461, "eval_samples_per_second": 116.214, "eval_steps_per_second": 3.636, "eval_wer": 0.4120684023643701, "step": 2000 }, { "epoch": 2.22, "learning_rate": 9.931555555555556e-05, "loss": 0.9582, "step": 4000 }, { "epoch": 2.22, "eval_loss": 0.24203644692897797, "eval_runtime": 109.3338, "eval_samples_per_second": 116.652, "eval_steps_per_second": 3.649, "eval_wer": 0.3336880590881792, "step": 4000 }, { "epoch": 3.33, "learning_rate": 9.894518518518519e-05, "loss": 0.8899, "step": 6000 }, { "epoch": 3.33, "eval_loss": 0.21842752397060394, "eval_runtime": 109.0188, "eval_samples_per_second": 116.989, "eval_steps_per_second": 3.66, "eval_wer": 0.3000136973311277, "step": 6000 }, { "epoch": 4.44, "learning_rate": 9.857500000000001e-05, "loss": 0.8488, "step": 8000 }, { "epoch": 4.44, "eval_loss": 0.20210157334804535, "eval_runtime": 108.438, "eval_samples_per_second": 117.616, "eval_steps_per_second": 3.68, "eval_wer": 0.28011042156170646, "step": 8000 }, { "epoch": 5.55, "learning_rate": 9.820481481481482e-05, "loss": 0.8243, "step": 10000 }, { "epoch": 5.55, "eval_loss": 0.19518321752548218, "eval_runtime": 109.2218, "eval_samples_per_second": 116.772, "eval_steps_per_second": 3.653, "eval_wer": 0.2646956558387508, "step": 10000 }, { "epoch": 6.66, "learning_rate": 9.783462962962964e-05, "loss": 0.8051, "step": 12000 }, { "epoch": 6.66, "eval_loss": 0.18829651176929474, "eval_runtime": 110.5038, "eval_samples_per_second": 115.417, "eval_steps_per_second": 3.611, "eval_wer": 0.25592936391701526, "step": 12000 }, { "epoch": 7.77, "learning_rate": 9.746444444444445e-05, "loss": 0.7875, "step": 14000 }, { "epoch": 7.77, "eval_loss": 0.18309463560581207, "eval_runtime": 109.5923, "eval_samples_per_second": 116.377, "eval_steps_per_second": 3.641, "eval_wer": 0.24850119588237154, "step": 14000 }, { "epoch": 8.88, "learning_rate": 9.709407407407408e-05, "loss": 0.772, "step": 16000 }, { "epoch": 8.88, "eval_loss": 0.17918168008327484, "eval_runtime": 109.8993, "eval_samples_per_second": 116.052, "eval_steps_per_second": 3.631, "eval_wer": 0.24318030955968348, "step": 16000 }, { "epoch": 9.99, "learning_rate": 9.672388888888889e-05, "loss": 0.7597, "step": 18000 }, { "epoch": 9.99, "eval_loss": 0.17556481063365936, "eval_runtime": 109.4542, "eval_samples_per_second": 116.524, "eval_steps_per_second": 3.645, "eval_wer": 0.23213815338903582, "step": 18000 }, { "epoch": 11.1, "learning_rate": 9.635351851851853e-05, "loss": 0.7462, "step": 20000 }, { "epoch": 11.1, "eval_loss": 0.17337526381015778, "eval_runtime": 107.9125, "eval_samples_per_second": 118.188, "eval_steps_per_second": 3.697, "eval_wer": 0.22965156096892814, "step": 20000 }, { "epoch": 12.22, "learning_rate": 9.598333333333334e-05, "loss": 0.7375, "step": 22000 }, { "epoch": 12.22, "eval_loss": 0.16925489902496338, "eval_runtime": 109.8662, "eval_samples_per_second": 116.087, "eval_steps_per_second": 3.632, "eval_wer": 0.22406726443224562, "step": 22000 }, { "epoch": 13.33, "learning_rate": 9.561314814814816e-05, "loss": 0.7266, "step": 24000 }, { "epoch": 13.33, "eval_loss": 0.16615571081638336, "eval_runtime": 108.8823, "eval_samples_per_second": 117.136, "eval_steps_per_second": 3.665, "eval_wer": 0.22065346805887745, "step": 24000 }, { "epoch": 14.44, "learning_rate": 9.524277777777778e-05, "loss": 0.7181, "step": 26000 }, { "epoch": 14.44, "eval_loss": 0.16478531062602997, "eval_runtime": 108.8044, "eval_samples_per_second": 117.22, "eval_steps_per_second": 3.667, "eval_wer": 0.21787185619909596, "step": 26000 }, { "epoch": 15.55, "learning_rate": 9.487277777777778e-05, "loss": 0.7143, "step": 28000 }, { "epoch": 15.55, "eval_loss": 0.16348408162593842, "eval_runtime": 109.4918, "eval_samples_per_second": 116.484, "eval_steps_per_second": 3.644, "eval_wer": 0.21597530265833587, "step": 28000 }, { "epoch": 16.66, "learning_rate": 9.45025925925926e-05, "loss": 0.7032, "step": 30000 }, { "epoch": 16.66, "eval_loss": 0.16043423116207123, "eval_runtime": 109.1929, "eval_samples_per_second": 116.802, "eval_steps_per_second": 3.654, "eval_wer": 0.21385748453782044, "step": 30000 }, { "epoch": 17.77, "learning_rate": 9.413222222222222e-05, "loss": 0.6988, "step": 32000 }, { "epoch": 17.77, "eval_loss": 0.16011235117912292, "eval_runtime": 109.5116, "eval_samples_per_second": 116.463, "eval_steps_per_second": 3.643, "eval_wer": 0.21078085323836518, "step": 32000 }, { "epoch": 18.88, "learning_rate": 9.376203703703704e-05, "loss": 0.6896, "step": 34000 }, { "epoch": 18.88, "eval_loss": 0.15551629662513733, "eval_runtime": 109.5231, "eval_samples_per_second": 116.45, "eval_steps_per_second": 3.643, "eval_wer": 0.20503851057328598, "step": 34000 }, { "epoch": 19.99, "learning_rate": 9.339166666666667e-05, "loss": 0.6837, "step": 36000 }, { "epoch": 19.99, "eval_loss": 0.15630744397640228, "eval_runtime": 111.1508, "eval_samples_per_second": 114.745, "eval_steps_per_second": 3.59, "eval_wer": 0.20782012243306747, "step": 36000 }, { "epoch": 21.1, "learning_rate": 9.302148148148149e-05, "loss": 0.6779, "step": 38000 }, { "epoch": 21.1, "eval_loss": 0.15380945801734924, "eval_runtime": 108.7667, "eval_samples_per_second": 117.26, "eval_steps_per_second": 3.668, "eval_wer": 0.2020250977251894, "step": 38000 }, { "epoch": 22.21, "learning_rate": 9.26512962962963e-05, "loss": 0.6749, "step": 40000 }, { "epoch": 22.21, "eval_loss": 0.15313279628753662, "eval_runtime": 109.8109, "eval_samples_per_second": 116.145, "eval_steps_per_second": 3.634, "eval_wer": 0.20110843018048868, "step": 40000 }, { "epoch": 23.32, "learning_rate": 9.228111111111112e-05, "loss": 0.6704, "step": 42000 }, { "epoch": 23.32, "eval_loss": 0.15281446278095245, "eval_runtime": 109.3199, "eval_samples_per_second": 116.667, "eval_steps_per_second": 3.65, "eval_wer": 0.2010978937719289, "step": 42000 }, { "epoch": 24.43, "learning_rate": 9.191074074074074e-05, "loss": 0.6669, "step": 44000 }, { "epoch": 24.43, "eval_loss": 0.15204770863056183, "eval_runtime": 109.4873, "eval_samples_per_second": 116.488, "eval_steps_per_second": 3.644, "eval_wer": 0.19591398076051797, "step": 44000 }, { "epoch": 25.54, "learning_rate": 9.154037037037038e-05, "loss": 0.6594, "step": 46000 }, { "epoch": 25.54, "eval_loss": 0.14905157685279846, "eval_runtime": 108.5995, "eval_samples_per_second": 117.441, "eval_steps_per_second": 3.674, "eval_wer": 0.19542930596676816, "step": 46000 }, { "epoch": 26.65, "learning_rate": 9.117018518518519e-05, "loss": 0.6542, "step": 48000 }, { "epoch": 26.65, "eval_loss": 0.14924176037311554, "eval_runtime": 110.1651, "eval_samples_per_second": 115.772, "eval_steps_per_second": 3.622, "eval_wer": 0.19538716033252906, "step": 48000 }, { "epoch": 27.76, "learning_rate": 9.079981481481482e-05, "loss": 0.652, "step": 50000 }, { "epoch": 27.76, "eval_loss": 0.14801371097564697, "eval_runtime": 109.7651, "eval_samples_per_second": 116.194, "eval_steps_per_second": 3.635, "eval_wer": 0.19368026214584497, "step": 50000 }, { "epoch": 28.87, "learning_rate": 9.042962962962963e-05, "loss": 0.6474, "step": 52000 }, { "epoch": 28.87, "eval_loss": 0.14799948036670685, "eval_runtime": 110.6825, "eval_samples_per_second": 115.231, "eval_steps_per_second": 3.605, "eval_wer": 0.192394820301552, "step": 52000 }, { "epoch": 29.98, "learning_rate": 9.005944444444445e-05, "loss": 0.6443, "step": 54000 }, { "epoch": 29.98, "eval_loss": 0.14905065298080444, "eval_runtime": 110.5602, "eval_samples_per_second": 115.358, "eval_steps_per_second": 3.609, "eval_wer": 0.19057202162071035, "step": 54000 }, { "epoch": 31.09, "learning_rate": 8.968907407407408e-05, "loss": 0.6407, "step": 56000 }, { "epoch": 31.09, "eval_loss": 0.14624294638633728, "eval_runtime": 108.1653, "eval_samples_per_second": 117.912, "eval_steps_per_second": 3.689, "eval_wer": 0.19001359196704212, "step": 56000 }, { "epoch": 32.2, "learning_rate": 8.931907407407408e-05, "loss": 0.6349, "step": 58000 }, { "epoch": 32.2, "eval_loss": 0.14455664157867432, "eval_runtime": 109.1496, "eval_samples_per_second": 116.849, "eval_steps_per_second": 3.656, "eval_wer": 0.18883351420834693, "step": 58000 }, { "epoch": 33.31, "learning_rate": 8.894870370370371e-05, "loss": 0.6337, "step": 60000 }, { "epoch": 33.31, "eval_loss": 0.1445704698562622, "eval_runtime": 109.4136, "eval_samples_per_second": 116.567, "eval_steps_per_second": 3.647, "eval_wer": 0.18898102392818383, "step": 60000 }, { "epoch": 34.43, "learning_rate": 8.85787037037037e-05, "loss": 0.6296, "step": 62000 }, { "epoch": 34.43, "eval_loss": 0.14853893220424652, "eval_runtime": 108.9606, "eval_samples_per_second": 117.052, "eval_steps_per_second": 3.662, "eval_wer": 0.18811703842628202, "step": 62000 }, { "epoch": 35.54, "learning_rate": 8.820833333333334e-05, "loss": 0.6249, "step": 64000 }, { "epoch": 35.54, "eval_loss": 0.14442408084869385, "eval_runtime": 110.1343, "eval_samples_per_second": 115.804, "eval_steps_per_second": 3.623, "eval_wer": 0.18499826149258763, "step": 64000 }, { "epoch": 36.65, "learning_rate": 8.783814814814815e-05, "loss": 0.6222, "step": 66000 }, { "epoch": 36.65, "eval_loss": 0.14410334825515747, "eval_runtime": 109.3816, "eval_samples_per_second": 116.601, "eval_steps_per_second": 3.648, "eval_wer": 0.18493504304122896, "step": 66000 }, { "epoch": 37.76, "learning_rate": 8.746796296296297e-05, "loss": 0.6173, "step": 68000 }, { "epoch": 37.76, "eval_loss": 0.14380425214767456, "eval_runtime": 111.8352, "eval_samples_per_second": 114.043, "eval_steps_per_second": 3.568, "eval_wer": 0.18183733892465415, "step": 68000 }, { "epoch": 38.87, "learning_rate": 8.709759259259259e-05, "loss": 0.6149, "step": 70000 }, { "epoch": 38.87, "eval_loss": 0.14080771803855896, "eval_runtime": 110.5146, "eval_samples_per_second": 115.406, "eval_steps_per_second": 3.61, "eval_wer": 0.18231147730984418, "step": 70000 }, { "epoch": 39.98, "learning_rate": 8.672740740740741e-05, "loss": 0.6114, "step": 72000 }, { "epoch": 39.98, "eval_loss": 0.1435374915599823, "eval_runtime": 110.4812, "eval_samples_per_second": 115.44, "eval_steps_per_second": 3.611, "eval_wer": 0.18260649674951795, "step": 72000 }, { "epoch": 41.09, "learning_rate": 8.635722222222222e-05, "loss": 0.6089, "step": 74000 }, { "epoch": 41.09, "eval_loss": 0.14298424124717712, "eval_runtime": 107.7167, "eval_samples_per_second": 118.403, "eval_steps_per_second": 3.704, "eval_wer": 0.18134212772234456, "step": 74000 }, { "epoch": 42.2, "learning_rate": 8.598722222222223e-05, "loss": 0.6066, "step": 76000 }, { "epoch": 42.2, "eval_loss": 0.14788854122161865, "eval_runtime": 108.9238, "eval_samples_per_second": 117.091, "eval_steps_per_second": 3.663, "eval_wer": 0.18113139955114899, "step": 76000 }, { "epoch": 43.31, "learning_rate": 8.561703703703704e-05, "loss": 0.6012, "step": 78000 }, { "epoch": 43.31, "eval_loss": 0.1424262672662735, "eval_runtime": 109.6315, "eval_samples_per_second": 116.335, "eval_steps_per_second": 3.639, "eval_wer": 0.18278561569503418, "step": 78000 }, { "epoch": 44.42, "learning_rate": 8.524666666666667e-05, "loss": 0.6009, "step": 80000 }, { "epoch": 44.42, "eval_loss": 0.1408558338880539, "eval_runtime": 109.6234, "eval_samples_per_second": 116.344, "eval_steps_per_second": 3.64, "eval_wer": 0.17987756693253537, "step": 80000 }, { "epoch": 45.53, "learning_rate": 8.487648148148148e-05, "loss": 0.5976, "step": 82000 }, { "epoch": 45.53, "eval_loss": 0.14050035178661346, "eval_runtime": 112.9556, "eval_samples_per_second": 112.912, "eval_steps_per_second": 3.532, "eval_wer": 0.1795298654500627, "step": 82000 }, { "epoch": 46.64, "learning_rate": 8.450611111111112e-05, "loss": 0.5949, "step": 84000 }, { "epoch": 46.64, "eval_loss": 0.14021989703178406, "eval_runtime": 109.6871, "eval_samples_per_second": 116.276, "eval_steps_per_second": 3.638, "eval_wer": 0.1760317778082163, "step": 84000 }, { "epoch": 47.75, "learning_rate": 8.413592592592593e-05, "loss": 0.5928, "step": 86000 }, { "epoch": 47.75, "eval_loss": 0.14247484505176544, "eval_runtime": 109.7686, "eval_samples_per_second": 116.19, "eval_steps_per_second": 3.635, "eval_wer": 0.1776859939521015, "step": 86000 }, { "epoch": 48.86, "learning_rate": 8.376574074074075e-05, "loss": 0.5886, "step": 88000 }, { "epoch": 48.86, "eval_loss": 0.1418529599905014, "eval_runtime": 110.4954, "eval_samples_per_second": 115.426, "eval_steps_per_second": 3.611, "eval_wer": 0.17910840910767156, "step": 88000 }, { "epoch": 49.97, "learning_rate": 8.339555555555556e-05, "loss": 0.5848, "step": 90000 }, { "epoch": 49.97, "eval_loss": 0.14168688654899597, "eval_runtime": 111.002, "eval_samples_per_second": 114.899, "eval_steps_per_second": 3.595, "eval_wer": 0.1777281395863406, "step": 90000 }, { "epoch": 51.08, "learning_rate": 8.302537037037037e-05, "loss": 0.5824, "step": 92000 }, { "epoch": 51.08, "eval_loss": 0.1420871764421463, "eval_runtime": 113.8377, "eval_samples_per_second": 112.037, "eval_steps_per_second": 3.505, "eval_wer": 0.17755955704938414, "step": 92000 }, { "epoch": 52.19, "learning_rate": 8.2655e-05, "loss": 0.5814, "step": 94000 }, { "epoch": 52.19, "eval_loss": 0.14134614169597626, "eval_runtime": 108.8305, "eval_samples_per_second": 117.191, "eval_steps_per_second": 3.666, "eval_wer": 0.17652698901052588, "step": 94000 }, { "epoch": 53.3, "learning_rate": 8.228481481481481e-05, "loss": 0.5774, "step": 96000 }, { "epoch": 53.3, "eval_loss": 0.1393408626317978, "eval_runtime": 108.8087, "eval_samples_per_second": 117.215, "eval_steps_per_second": 3.667, "eval_wer": 0.1742721975787333, "step": 96000 }, { "epoch": 54.41, "learning_rate": 8.191462962962963e-05, "loss": 0.573, "step": 98000 }, { "epoch": 54.41, "eval_loss": 0.1408652812242508, "eval_runtime": 109.0156, "eval_samples_per_second": 116.992, "eval_steps_per_second": 3.66, "eval_wer": 0.1753574476603905, "step": 98000 }, { "epoch": 55.52, "learning_rate": 8.154444444444444e-05, "loss": 0.573, "step": 100000 }, { "epoch": 55.52, "eval_loss": 0.14187079668045044, "eval_runtime": 110.6508, "eval_samples_per_second": 115.264, "eval_steps_per_second": 3.606, "eval_wer": 0.17601070499109672, "step": 100000 }, { "epoch": 56.64, "learning_rate": 8.117407407407408e-05, "loss": 0.5687, "step": 102000 }, { "epoch": 56.64, "eval_loss": 0.1391436606645584, "eval_runtime": 109.8014, "eval_samples_per_second": 116.155, "eval_steps_per_second": 3.634, "eval_wer": 0.1731658746799566, "step": 102000 }, { "epoch": 57.75, "learning_rate": 8.080388888888889e-05, "loss": 0.5648, "step": 104000 }, { "epoch": 57.75, "eval_loss": 0.13951122760772705, "eval_runtime": 109.5333, "eval_samples_per_second": 116.44, "eval_steps_per_second": 3.643, "eval_wer": 0.1744197072985702, "step": 104000 }, { "epoch": 58.86, "learning_rate": 8.043370370370371e-05, "loss": 0.5623, "step": 106000 }, { "epoch": 58.86, "eval_loss": 0.141478031873703, "eval_runtime": 126.0697, "eval_samples_per_second": 101.166, "eval_steps_per_second": 3.165, "eval_wer": 0.17249154453213078, "step": 106000 }, { "epoch": 59.97, "learning_rate": 8.006333333333333e-05, "loss": 0.5613, "step": 108000 }, { "epoch": 59.97, "eval_loss": 0.140051931142807, "eval_runtime": 110.0609, "eval_samples_per_second": 115.881, "eval_steps_per_second": 3.625, "eval_wer": 0.17356625820522817, "step": 108000 }, { "epoch": 61.08, "learning_rate": 7.969333333333335e-05, "loss": 0.5567, "step": 110000 }, { "epoch": 61.08, "eval_loss": 0.1388338953256607, "eval_runtime": 111.9727, "eval_samples_per_second": 113.903, "eval_steps_per_second": 3.563, "eval_wer": 0.17155380417031052, "step": 110000 }, { "epoch": 62.19, "learning_rate": 7.932296296296296e-05, "loss": 0.5558, "step": 112000 }, { "epoch": 62.19, "eval_loss": 0.1424715220928192, "eval_runtime": 117.7299, "eval_samples_per_second": 108.333, "eval_steps_per_second": 3.389, "eval_wer": 0.1722386707266961, "step": 112000 }, { "epoch": 63.3, "learning_rate": 7.895277777777778e-05, "loss": 0.5515, "step": 114000 }, { "epoch": 63.3, "eval_loss": 0.14163948595523834, "eval_runtime": 110.1956, "eval_samples_per_second": 115.74, "eval_steps_per_second": 3.621, "eval_wer": 0.1728181731974839, "step": 114000 }, { "epoch": 64.41, "learning_rate": 7.858240740740741e-05, "loss": 0.5517, "step": 116000 }, { "epoch": 64.41, "eval_loss": 0.1408870965242386, "eval_runtime": 110.3371, "eval_samples_per_second": 115.591, "eval_steps_per_second": 3.616, "eval_wer": 0.17021568028321865, "step": 116000 }, { "epoch": 65.52, "learning_rate": 7.821222222222223e-05, "loss": 0.5462, "step": 118000 }, { "epoch": 65.52, "eval_loss": 0.14396576583385468, "eval_runtime": 110.3476, "eval_samples_per_second": 115.58, "eval_steps_per_second": 3.616, "eval_wer": 0.1711112750107998, "step": 118000 }, { "epoch": 66.63, "learning_rate": 7.784203703703704e-05, "loss": 0.5468, "step": 120000 }, { "epoch": 66.63, "eval_loss": 0.14202018082141876, "eval_runtime": 110.3323, "eval_samples_per_second": 115.596, "eval_steps_per_second": 3.616, "eval_wer": 0.16947813168403417, "step": 120000 }, { "epoch": 67.74, "learning_rate": 7.747166666666667e-05, "loss": 0.5436, "step": 122000 }, { "epoch": 67.74, "eval_loss": 0.1415812373161316, "eval_runtime": 111.4691, "eval_samples_per_second": 114.417, "eval_steps_per_second": 3.579, "eval_wer": 0.16994173366066442, "step": 122000 }, { "epoch": 68.85, "learning_rate": 7.710166666666667e-05, "loss": 0.5391, "step": 124000 }, { "epoch": 68.85, "eval_loss": 0.1431398242712021, "eval_runtime": 110.7164, "eval_samples_per_second": 115.195, "eval_steps_per_second": 3.604, "eval_wer": 0.1731131926371577, "step": 124000 }, { "epoch": 69.96, "learning_rate": 7.673129629629629e-05, "loss": 0.5386, "step": 126000 }, { "epoch": 69.96, "eval_loss": 0.13997778296470642, "eval_runtime": 115.7532, "eval_samples_per_second": 110.183, "eval_steps_per_second": 3.447, "eval_wer": 0.17011031619762088, "step": 126000 }, { "epoch": 71.07, "learning_rate": 7.636111111111111e-05, "loss": 0.5355, "step": 128000 }, { "epoch": 71.07, "eval_loss": 0.14404296875, "eval_runtime": 108.9746, "eval_samples_per_second": 117.036, "eval_steps_per_second": 3.661, "eval_wer": 0.17013138901474043, "step": 128000 }, { "epoch": 72.18, "learning_rate": 7.599074074074074e-05, "loss": 0.5326, "step": 130000 }, { "epoch": 72.18, "eval_loss": 0.14536485075950623, "eval_runtime": 110.0456, "eval_samples_per_second": 115.897, "eval_steps_per_second": 3.626, "eval_wer": 0.16906721175020284, "step": 130000 }, { "epoch": 73.29, "learning_rate": 7.562074074074074e-05, "loss": 0.5312, "step": 132000 }, { "epoch": 73.29, "eval_loss": 0.14173446595668793, "eval_runtime": 111.1917, "eval_samples_per_second": 114.703, "eval_steps_per_second": 3.588, "eval_wer": 0.1693306219641973, "step": 132000 }, { "epoch": 74.4, "learning_rate": 7.525037037037037e-05, "loss": 0.5274, "step": 134000 }, { "epoch": 74.4, "eval_loss": 0.1436368077993393, "eval_runtime": 109.3262, "eval_samples_per_second": 116.66, "eval_steps_per_second": 3.65, "eval_wer": 0.17104805655944114, "step": 134000 }, { "epoch": 75.51, "learning_rate": 7.488e-05, "loss": 0.5227, "step": 136000 }, { "epoch": 75.51, "eval_loss": 0.1463368982076645, "eval_runtime": 110.3294, "eval_samples_per_second": 115.599, "eval_steps_per_second": 3.616, "eval_wer": 0.17241778967221233, "step": 136000 }, { "epoch": 76.62, "learning_rate": 7.450962962962963e-05, "loss": 0.5245, "step": 138000 }, { "epoch": 76.62, "eval_loss": 0.14312437176704407, "eval_runtime": 109.8942, "eval_samples_per_second": 116.057, "eval_steps_per_second": 3.631, "eval_wer": 0.16633828193322026, "step": 138000 }, { "epoch": 77.73, "learning_rate": 7.413962962962963e-05, "loss": 0.5216, "step": 140000 }, { "epoch": 77.73, "eval_loss": 0.14303582906723022, "eval_runtime": 113.419, "eval_samples_per_second": 112.45, "eval_steps_per_second": 3.518, "eval_wer": 0.1681505442055021, "step": 140000 }, { "epoch": 78.84, "learning_rate": 7.376925925925926e-05, "loss": 0.5163, "step": 142000 }, { "epoch": 78.84, "eval_loss": 0.14348630607128143, "eval_runtime": 111.4429, "eval_samples_per_second": 114.444, "eval_steps_per_second": 3.58, "eval_wer": 0.1669915392639265, "step": 142000 }, { "epoch": 79.96, "learning_rate": 7.339925925925926e-05, "loss": 0.5159, "step": 144000 }, { "epoch": 79.96, "eval_loss": 0.14502613246440887, "eval_runtime": 110.5689, "eval_samples_per_second": 115.349, "eval_steps_per_second": 3.609, "eval_wer": 0.16607487171922578, "step": 144000 }, { "epoch": 81.07, "learning_rate": 7.302888888888889e-05, "loss": 0.5152, "step": 146000 }, { "epoch": 81.07, "eval_loss": 0.14235709607601166, "eval_runtime": 107.4013, "eval_samples_per_second": 118.751, "eval_steps_per_second": 3.715, "eval_wer": 0.16677027468417116, "step": 146000 }, { "epoch": 82.18, "learning_rate": 7.265870370370371e-05, "loss": 0.5114, "step": 148000 }, { "epoch": 82.18, "eval_loss": 0.14669308066368103, "eval_runtime": 108.9248, "eval_samples_per_second": 117.09, "eval_steps_per_second": 3.663, "eval_wer": 0.1700049521120231, "step": 148000 }, { "epoch": 83.29, "learning_rate": 7.228833333333333e-05, "loss": 0.5102, "step": 150000 }, { "epoch": 83.29, "eval_loss": 0.14800167083740234, "eval_runtime": 110.9546, "eval_samples_per_second": 114.948, "eval_steps_per_second": 3.596, "eval_wer": 0.16751835969191542, "step": 150000 }, { "epoch": 84.4, "learning_rate": 7.191833333333333e-05, "loss": 0.5074, "step": 152000 }, { "epoch": 84.4, "eval_loss": 0.14698636531829834, "eval_runtime": 109.1016, "eval_samples_per_second": 116.9, "eval_steps_per_second": 3.657, "eval_wer": 0.16448387402669926, "step": 152000 }, { "epoch": 85.51, "learning_rate": 7.154796296296296e-05, "loss": 0.5048, "step": 154000 }, { "epoch": 85.51, "eval_loss": 0.14937056601047516, "eval_runtime": 120.2598, "eval_samples_per_second": 106.054, "eval_steps_per_second": 3.318, "eval_wer": 0.16732870433783942, "step": 154000 }, { "epoch": 86.62, "learning_rate": 7.117796296296296e-05, "loss": 0.5052, "step": 156000 }, { "epoch": 86.62, "eval_loss": 0.14464369416236877, "eval_runtime": 109.6362, "eval_samples_per_second": 116.33, "eval_steps_per_second": 3.639, "eval_wer": 0.16599058045074755, "step": 156000 }, { "epoch": 87.73, "learning_rate": 7.080759259259259e-05, "loss": 0.5017, "step": 158000 }, { "epoch": 87.73, "eval_loss": 0.14508092403411865, "eval_runtime": 111.0679, "eval_samples_per_second": 114.831, "eval_steps_per_second": 3.592, "eval_wer": 0.1682559082910999, "step": 158000 }, { "epoch": 88.84, "learning_rate": 7.04374074074074e-05, "loss": 0.5009, "step": 160000 }, { "epoch": 88.84, "eval_loss": 0.15082955360412598, "eval_runtime": 117.4553, "eval_samples_per_second": 108.586, "eval_steps_per_second": 3.397, "eval_wer": 0.16449441043525903, "step": 160000 }, { "epoch": 89.95, "learning_rate": 7.006703703703704e-05, "loss": 0.4991, "step": 162000 }, { "epoch": 89.95, "eval_loss": 0.14789538085460663, "eval_runtime": 110.7552, "eval_samples_per_second": 115.155, "eval_steps_per_second": 3.603, "eval_wer": 0.16420992740414503, "step": 162000 }, { "epoch": 91.06, "learning_rate": 6.969666666666666e-05, "loss": 0.4955, "step": 164000 }, { "epoch": 91.06, "eval_loss": 0.15189045667648315, "eval_runtime": 110.2282, "eval_samples_per_second": 115.705, "eval_steps_per_second": 3.62, "eval_wer": 0.1638832987387919, "step": 164000 }, { "epoch": 92.17, "learning_rate": 6.932666666666667e-05, "loss": 0.4954, "step": 166000 }, { "epoch": 92.17, "eval_loss": 0.1472417414188385, "eval_runtime": 112.6524, "eval_samples_per_second": 113.216, "eval_steps_per_second": 3.542, "eval_wer": 0.16437850994110148, "step": 166000 }, { "epoch": 93.28, "learning_rate": 6.895629629629629e-05, "loss": 0.4937, "step": 168000 }, { "epoch": 93.28, "eval_loss": 0.14937187731266022, "eval_runtime": 111.7122, "eval_samples_per_second": 114.168, "eval_steps_per_second": 3.572, "eval_wer": 0.16531625030292174, "step": 168000 }, { "epoch": 94.39, "learning_rate": 6.858611111111111e-05, "loss": 0.4913, "step": 170000 }, { "epoch": 94.39, "eval_loss": 0.14994442462921143, "eval_runtime": 111.1265, "eval_samples_per_second": 114.77, "eval_steps_per_second": 3.591, "eval_wer": 0.16276643943145538, "step": 170000 }, { "epoch": 95.5, "learning_rate": 6.821574074074074e-05, "loss": 0.4877, "step": 172000 }, { "epoch": 95.5, "eval_loss": 0.15009111166000366, "eval_runtime": 110.8815, "eval_samples_per_second": 115.024, "eval_steps_per_second": 3.598, "eval_wer": 0.16477889346637306, "step": 172000 }, { "epoch": 96.61, "learning_rate": 6.784555555555556e-05, "loss": 0.4855, "step": 174000 }, { "epoch": 96.61, "eval_loss": 0.15036821365356445, "eval_runtime": 123.2578, "eval_samples_per_second": 103.474, "eval_steps_per_second": 3.237, "eval_wer": 0.16344076957928122, "step": 174000 }, { "epoch": 97.72, "learning_rate": 6.747537037037037e-05, "loss": 0.4865, "step": 176000 }, { "epoch": 97.72, "eval_loss": 0.15151378512382507, "eval_runtime": 110.1192, "eval_samples_per_second": 115.82, "eval_steps_per_second": 3.623, "eval_wer": 0.16542161438851952, "step": 176000 }, { "epoch": 98.83, "learning_rate": 6.7105e-05, "loss": 0.4804, "step": 178000 }, { "epoch": 98.83, "eval_loss": 0.1545591503381729, "eval_runtime": 109.0975, "eval_samples_per_second": 116.905, "eval_steps_per_second": 3.657, "eval_wer": 0.16173387139259712, "step": 178000 }, { "epoch": 99.94, "learning_rate": 6.6735e-05, "loss": 0.4804, "step": 180000 }, { "epoch": 99.94, "eval_loss": 0.14692111313343048, "eval_runtime": 110.3968, "eval_samples_per_second": 115.529, "eval_steps_per_second": 3.614, "eval_wer": 0.16394651719015058, "step": 180000 }, { "epoch": 101.05, "learning_rate": 6.636481481481481e-05, "loss": 0.4784, "step": 182000 }, { "epoch": 101.05, "eval_loss": 0.15785107016563416, "eval_runtime": 119.3969, "eval_samples_per_second": 106.82, "eval_steps_per_second": 3.342, "eval_wer": 0.16587467995659, "step": 182000 }, { "epoch": 102.17, "learning_rate": 6.599462962962963e-05, "loss": 0.4754, "step": 184000 }, { "epoch": 102.17, "eval_loss": 0.1541442722082138, "eval_runtime": 110.3894, "eval_samples_per_second": 115.536, "eval_steps_per_second": 3.614, "eval_wer": 0.16502123086324796, "step": 184000 }, { "epoch": 103.28, "learning_rate": 6.562425925925926e-05, "loss": 0.4736, "step": 186000 }, { "epoch": 103.28, "eval_loss": 0.15858229994773865, "eval_runtime": 111.9719, "eval_samples_per_second": 113.904, "eval_steps_per_second": 3.563, "eval_wer": 0.16279804865713474, "step": 186000 }, { "epoch": 104.39, "learning_rate": 6.525407407407407e-05, "loss": 0.4733, "step": 188000 }, { "epoch": 104.39, "eval_loss": 0.15577121078968048, "eval_runtime": 111.8331, "eval_samples_per_second": 114.045, "eval_steps_per_second": 3.568, "eval_wer": 0.16437850994110148, "step": 188000 }, { "epoch": 105.5, "learning_rate": 6.48837037037037e-05, "loss": 0.4729, "step": 190000 }, { "epoch": 105.5, "eval_loss": 0.15798160433769226, "eval_runtime": 110.7044, "eval_samples_per_second": 115.208, "eval_steps_per_second": 3.604, "eval_wer": 0.16497908522900884, "step": 190000 }, { "epoch": 106.61, "learning_rate": 6.451351851851852e-05, "loss": 0.4708, "step": 192000 }, { "epoch": 106.61, "eval_loss": 0.1557486653327942, "eval_runtime": 112.6241, "eval_samples_per_second": 113.244, "eval_steps_per_second": 3.543, "eval_wer": 0.16445226480101993, "step": 192000 }, { "epoch": 107.72, "learning_rate": 6.414314814814814e-05, "loss": 0.4674, "step": 194000 }, { "epoch": 107.72, "eval_loss": 0.15497049689292908, "eval_runtime": 111.8369, "eval_samples_per_second": 114.041, "eval_steps_per_second": 3.568, "eval_wer": 0.1644101191667808, "step": 194000 }, { "epoch": 108.83, "learning_rate": 6.377314814814816e-05, "loss": 0.4645, "step": 196000 }, { "epoch": 108.83, "eval_loss": 0.15774163603782654, "eval_runtime": 115.5016, "eval_samples_per_second": 110.423, "eval_steps_per_second": 3.454, "eval_wer": 0.16043789313974438, "step": 196000 }, { "epoch": 109.94, "learning_rate": 6.340277777777777e-05, "loss": 0.4652, "step": 198000 }, { "epoch": 109.94, "eval_loss": 0.1597561538219452, "eval_runtime": 112.242, "eval_samples_per_second": 113.63, "eval_steps_per_second": 3.555, "eval_wer": 0.16909882097588216, "step": 198000 }, { "epoch": 111.05, "learning_rate": 6.303240740740741e-05, "loss": 0.4636, "step": 200000 }, { "epoch": 111.05, "eval_loss": 0.15712451934814453, "eval_runtime": 116.0757, "eval_samples_per_second": 109.877, "eval_steps_per_second": 3.437, "eval_wer": 0.16316682295672696, "step": 200000 }, { "epoch": 112.16, "learning_rate": 6.266222222222222e-05, "loss": 0.4615, "step": 202000 }, { "epoch": 112.16, "eval_loss": 0.16025520861148834, "eval_runtime": 109.4143, "eval_samples_per_second": 116.566, "eval_steps_per_second": 3.647, "eval_wer": 0.15968980813200012, "step": 202000 }, { "epoch": 113.27, "learning_rate": 6.229203703703704e-05, "loss": 0.4586, "step": 204000 }, { "epoch": 113.27, "eval_loss": 0.16139160096645355, "eval_runtime": 110.5104, "eval_samples_per_second": 115.41, "eval_steps_per_second": 3.611, "eval_wer": 0.1633986239450421, "step": 204000 }, { "epoch": 114.38, "learning_rate": 6.192185185185185e-05, "loss": 0.4546, "step": 206000 }, { "epoch": 114.38, "eval_loss": 0.15872280299663544, "eval_runtime": 111.2156, "eval_samples_per_second": 114.678, "eval_steps_per_second": 3.588, "eval_wer": 0.1650001580461284, "step": 206000 }, { "epoch": 115.49, "learning_rate": 6.155166666666668e-05, "loss": 0.4554, "step": 208000 }, { "epoch": 115.49, "eval_loss": 0.1584981381893158, "eval_runtime": 109.0033, "eval_samples_per_second": 117.006, "eval_steps_per_second": 3.66, "eval_wer": 0.16186030829531445, "step": 208000 }, { "epoch": 116.6, "learning_rate": 6.118148148148148e-05, "loss": 0.4534, "step": 210000 }, { "epoch": 116.6, "eval_loss": 0.16507840156555176, "eval_runtime": 111.2078, "eval_samples_per_second": 114.686, "eval_steps_per_second": 3.588, "eval_wer": 0.16058540285958128, "step": 210000 }, { "epoch": 117.71, "learning_rate": 6.081111111111112e-05, "loss": 0.452, "step": 212000 }, { "epoch": 117.71, "eval_loss": 0.16277721524238586, "eval_runtime": 110.743, "eval_samples_per_second": 115.168, "eval_steps_per_second": 3.603, "eval_wer": 0.16030091982846728, "step": 212000 }, { "epoch": 118.82, "learning_rate": 6.044092592592593e-05, "loss": 0.4498, "step": 214000 }, { "epoch": 118.82, "eval_loss": 0.1640331745147705, "eval_runtime": 111.5522, "eval_samples_per_second": 114.332, "eval_steps_per_second": 3.577, "eval_wer": 0.1609752499762931, "step": 214000 }, { "epoch": 119.93, "learning_rate": 6.0070555555555565e-05, "loss": 0.4475, "step": 216000 }, { "epoch": 119.93, "eval_loss": 0.16334258019924164, "eval_runtime": 110.4588, "eval_samples_per_second": 115.464, "eval_steps_per_second": 3.612, "eval_wer": 0.16188138111243403, "step": 216000 }, { "epoch": 121.04, "learning_rate": 5.9700370370370374e-05, "loss": 0.4456, "step": 218000 }, { "epoch": 121.04, "eval_loss": 0.1660892814397812, "eval_runtime": 109.1417, "eval_samples_per_second": 116.857, "eval_steps_per_second": 3.656, "eval_wer": 0.15847812114762563, "step": 218000 }, { "epoch": 122.15, "learning_rate": 5.933018518518518e-05, "loss": 0.4453, "step": 220000 }, { "epoch": 122.15, "eval_loss": 0.16312651336193085, "eval_runtime": 109.8128, "eval_samples_per_second": 116.143, "eval_steps_per_second": 3.633, "eval_wer": 0.15943693432656544, "step": 220000 }, { "epoch": 123.26, "learning_rate": 5.8960000000000005e-05, "loss": 0.443, "step": 222000 }, { "epoch": 123.26, "eval_loss": 0.16088205575942993, "eval_runtime": 110.2399, "eval_samples_per_second": 115.693, "eval_steps_per_second": 3.619, "eval_wer": 0.15721375212045222, "step": 222000 }, { "epoch": 124.38, "learning_rate": 5.8589814814814813e-05, "loss": 0.4437, "step": 224000 }, { "epoch": 124.38, "eval_loss": 0.1613088697195053, "eval_runtime": 111.1211, "eval_samples_per_second": 114.776, "eval_steps_per_second": 3.591, "eval_wer": 0.15813041966515293, "step": 224000 }, { "epoch": 125.49, "learning_rate": 5.821944444444445e-05, "loss": 0.4391, "step": 226000 }, { "epoch": 125.49, "eval_loss": 0.17190876603126526, "eval_runtime": 125.7786, "eval_samples_per_second": 101.4, "eval_steps_per_second": 3.172, "eval_wer": 0.1587309949530603, "step": 226000 }, { "epoch": 126.6, "learning_rate": 5.784925925925926e-05, "loss": 0.4372, "step": 228000 }, { "epoch": 126.6, "eval_loss": 0.16242261230945587, "eval_runtime": 111.445, "eval_samples_per_second": 114.442, "eval_steps_per_second": 3.58, "eval_wer": 0.15744555310876734, "step": 228000 }, { "epoch": 127.71, "learning_rate": 5.747907407407408e-05, "loss": 0.4377, "step": 230000 }, { "epoch": 127.71, "eval_loss": 0.16486689448356628, "eval_runtime": 111.8011, "eval_samples_per_second": 114.078, "eval_steps_per_second": 3.569, "eval_wer": 0.15686605063797954, "step": 230000 }, { "epoch": 128.82, "learning_rate": 5.71087037037037e-05, "loss": 0.4349, "step": 232000 }, { "epoch": 128.82, "eval_loss": 0.1673368662595749, "eval_runtime": 112.5569, "eval_samples_per_second": 113.312, "eval_steps_per_second": 3.545, "eval_wer": 0.15947907996080457, "step": 232000 }, { "epoch": 129.93, "learning_rate": 5.6738518518518525e-05, "loss": 0.4342, "step": 234000 }, { "epoch": 129.93, "eval_loss": 0.16691875457763672, "eval_runtime": 118.5844, "eval_samples_per_second": 107.552, "eval_steps_per_second": 3.365, "eval_wer": 0.15731911620605002, "step": 234000 }, { "epoch": 131.04, "learning_rate": 5.6368333333333334e-05, "loss": 0.4335, "step": 236000 }, { "epoch": 131.04, "eval_loss": 0.17210978269577026, "eval_runtime": 110.1221, "eval_samples_per_second": 115.817, "eval_steps_per_second": 3.623, "eval_wer": 0.15840436628770718, "step": 236000 }, { "epoch": 132.15, "learning_rate": 5.599796296296297e-05, "loss": 0.4302, "step": 238000 }, { "epoch": 132.15, "eval_loss": 0.17578274011611938, "eval_runtime": 110.8686, "eval_samples_per_second": 115.037, "eval_steps_per_second": 3.599, "eval_wer": 0.1587309949530603, "step": 238000 }, { "epoch": 133.26, "learning_rate": 5.562777777777778e-05, "loss": 0.4292, "step": 240000 }, { "epoch": 133.26, "eval_loss": 0.173477903008461, "eval_runtime": 110.0323, "eval_samples_per_second": 115.911, "eval_steps_per_second": 3.626, "eval_wer": 0.16007965524871193, "step": 240000 }, { "epoch": 134.37, "learning_rate": 5.5257407407407414e-05, "loss": 0.4269, "step": 242000 }, { "epoch": 134.37, "eval_loss": 0.16973139345645905, "eval_runtime": 113.6422, "eval_samples_per_second": 112.229, "eval_steps_per_second": 3.511, "eval_wer": 0.1569819511321371, "step": 242000 }, { "epoch": 135.48, "learning_rate": 5.488722222222222e-05, "loss": 0.4273, "step": 244000 }, { "epoch": 135.48, "eval_loss": 0.17380733788013458, "eval_runtime": 110.4957, "eval_samples_per_second": 115.425, "eval_steps_per_second": 3.611, "eval_wer": 0.15647620352126773, "step": 244000 }, { "epoch": 136.59, "learning_rate": 5.4517037037037045e-05, "loss": 0.4257, "step": 246000 }, { "epoch": 136.59, "eval_loss": 0.16813333332538605, "eval_runtime": 111.9779, "eval_samples_per_second": 113.897, "eval_steps_per_second": 3.563, "eval_wer": 0.15652888556406663, "step": 246000 }, { "epoch": 137.7, "learning_rate": 5.414666666666667e-05, "loss": 0.4221, "step": 248000 }, { "epoch": 137.7, "eval_loss": 0.17320315539836884, "eval_runtime": 111.5731, "eval_samples_per_second": 114.311, "eval_steps_per_second": 3.576, "eval_wer": 0.15928942460672854, "step": 248000 }, { "epoch": 138.81, "learning_rate": 5.377648148148149e-05, "loss": 0.422, "step": 250000 }, { "epoch": 138.81, "eval_loss": 0.17005948722362518, "eval_runtime": 116.6285, "eval_samples_per_second": 109.356, "eval_steps_per_second": 3.421, "eval_wer": 0.15444267666923053, "step": 250000 }, { "epoch": 139.92, "learning_rate": 5.34062962962963e-05, "loss": 0.4178, "step": 252000 }, { "epoch": 139.92, "eval_loss": 0.17322979867458344, "eval_runtime": 117.6422, "eval_samples_per_second": 108.413, "eval_steps_per_second": 3.392, "eval_wer": 0.15422141208947518, "step": 252000 }, { "epoch": 141.03, "learning_rate": 5.303611111111111e-05, "loss": 0.4182, "step": 254000 }, { "epoch": 141.03, "eval_loss": 0.1738407015800476, "eval_runtime": 113.7677, "eval_samples_per_second": 112.106, "eval_steps_per_second": 3.507, "eval_wer": 0.15336796299613314, "step": 254000 }, { "epoch": 142.14, "learning_rate": 5.266592592592593e-05, "loss": 0.4166, "step": 256000 }, { "epoch": 142.14, "eval_loss": 0.1734342724084854, "eval_runtime": 112.0095, "eval_samples_per_second": 113.865, "eval_steps_per_second": 3.562, "eval_wer": 0.15277792411678556, "step": 256000 }, { "epoch": 143.25, "learning_rate": 5.229574074074074e-05, "loss": 0.4163, "step": 258000 }, { "epoch": 143.25, "eval_loss": 0.1752772331237793, "eval_runtime": 111.2622, "eval_samples_per_second": 114.63, "eval_steps_per_second": 3.586, "eval_wer": 0.15545417189096924, "step": 258000 }, { "epoch": 144.36, "learning_rate": 5.1925370370370374e-05, "loss": 0.4144, "step": 260000 }, { "epoch": 144.36, "eval_loss": 0.17757321894168854, "eval_runtime": 114.1149, "eval_samples_per_second": 111.765, "eval_steps_per_second": 3.496, "eval_wer": 0.15680283218662086, "step": 260000 }, { "epoch": 145.47, "learning_rate": 5.1555e-05, "loss": 0.4103, "step": 262000 }, { "epoch": 145.47, "eval_loss": 0.17166772484779358, "eval_runtime": 111.5777, "eval_samples_per_second": 114.306, "eval_steps_per_second": 3.576, "eval_wer": 0.15237754059151398, "step": 262000 }, { "epoch": 146.59, "learning_rate": 5.1185000000000005e-05, "loss": 0.4117, "step": 264000 }, { "epoch": 146.59, "eval_loss": 0.1743527501821518, "eval_runtime": 112.423, "eval_samples_per_second": 113.447, "eval_steps_per_second": 3.549, "eval_wer": 0.15553846315944747, "step": 264000 }, { "epoch": 147.7, "learning_rate": 5.081462962962963e-05, "loss": 0.4085, "step": 266000 }, { "epoch": 147.7, "eval_loss": 0.17306753993034363, "eval_runtime": 112.3063, "eval_samples_per_second": 113.564, "eval_steps_per_second": 3.553, "eval_wer": 0.15564382724504525, "step": 266000 }, { "epoch": 148.81, "learning_rate": 5.0444259259259264e-05, "loss": 0.408, "step": 268000 }, { "epoch": 148.81, "eval_loss": 0.17164543271064758, "eval_runtime": 113.1307, "eval_samples_per_second": 112.737, "eval_steps_per_second": 3.527, "eval_wer": 0.1530940163735789, "step": 268000 }, { "epoch": 149.92, "learning_rate": 5.0073888888888886e-05, "loss": 0.4078, "step": 270000 }, { "epoch": 149.92, "eval_loss": 0.17812077701091766, "eval_runtime": 114.4521, "eval_samples_per_second": 111.435, "eval_steps_per_second": 3.486, "eval_wer": 0.15219842164599776, "step": 270000 }, { "epoch": 151.03, "learning_rate": 4.970425925925926e-05, "loss": 0.4051, "step": 272000 }, { "epoch": 151.03, "eval_loss": 0.17345324158668518, "eval_runtime": 109.0896, "eval_samples_per_second": 116.913, "eval_steps_per_second": 3.658, "eval_wer": 0.15237754059151398, "step": 272000 }, { "epoch": 152.14, "learning_rate": 4.933388888888889e-05, "loss": 0.4033, "step": 274000 }, { "epoch": 152.14, "eval_loss": 0.18191710114479065, "eval_runtime": 111.0027, "eval_samples_per_second": 114.898, "eval_steps_per_second": 3.595, "eval_wer": 0.15185072016352505, "step": 274000 }, { "epoch": 153.25, "learning_rate": 4.896351851851852e-05, "loss": 0.4037, "step": 276000 }, { "epoch": 153.25, "eval_loss": 0.18181933462619781, "eval_runtime": 110.4212, "eval_samples_per_second": 115.503, "eval_steps_per_second": 3.613, "eval_wer": 0.15307294355645934, "step": 276000 }, { "epoch": 154.36, "learning_rate": 4.8593333333333335e-05, "loss": 0.4002, "step": 278000 }, { "epoch": 154.36, "eval_loss": 0.17859378457069397, "eval_runtime": 116.538, "eval_samples_per_second": 109.441, "eval_steps_per_second": 3.424, "eval_wer": 0.1548852058287412, "step": 278000 }, { "epoch": 155.47, "learning_rate": 4.8222962962962964e-05, "loss": 0.4021, "step": 280000 }, { "epoch": 155.47, "eval_loss": 0.17952340841293335, "eval_runtime": 112.6278, "eval_samples_per_second": 113.24, "eval_steps_per_second": 3.543, "eval_wer": 0.15223003087167708, "step": 280000 }, { "epoch": 156.58, "learning_rate": 4.785277777777778e-05, "loss": 0.3985, "step": 282000 }, { "epoch": 156.58, "eval_loss": 0.18241500854492188, "eval_runtime": 110.8984, "eval_samples_per_second": 115.006, "eval_steps_per_second": 3.598, "eval_wer": 0.1529570430623018, "step": 282000 }, { "epoch": 157.69, "learning_rate": 4.7482592592592595e-05, "loss": 0.3987, "step": 284000 }, { "epoch": 157.69, "eval_loss": 0.18239329755306244, "eval_runtime": 116.2871, "eval_samples_per_second": 109.677, "eval_steps_per_second": 3.431, "eval_wer": 0.15229324932303576, "step": 284000 }, { "epoch": 158.8, "learning_rate": 4.7112222222222224e-05, "loss": 0.3957, "step": 286000 }, { "epoch": 158.8, "eval_loss": 0.18385788798332214, "eval_runtime": 111.0629, "eval_samples_per_second": 114.836, "eval_steps_per_second": 3.593, "eval_wer": 0.15160838276665015, "step": 286000 } ], "max_steps": 540300, "num_train_epochs": 300, "total_flos": 2.3864414600426045e+21, "trial_name": null, "trial_params": null }