{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.73957158962796, "global_step": 39600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "learning_rate": 0.0002388, "loss": 5.1061, "step": 400 }, { "epoch": 0.36, "eval_loss": 3.2615509033203125, "eval_runtime": 209.4571, "eval_samples_per_second": 18.892, "eval_steps_per_second": 2.363, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.72, "learning_rate": 0.0002990989719814553, "loss": 1.8298, "step": 800 }, { "epoch": 0.72, "eval_loss": 0.5897451639175415, "eval_runtime": 208.9676, "eval_samples_per_second": 18.936, "eval_steps_per_second": 2.369, "eval_wer": 0.7382658232272045, "step": 800 }, { "epoch": 1.08, "learning_rate": 0.0002978895383995162, "loss": 0.7312, "step": 1200 }, { "epoch": 1.08, "eval_loss": 0.38931864500045776, "eval_runtime": 205.7748, "eval_samples_per_second": 19.23, "eval_steps_per_second": 2.406, "eval_wer": 0.5565651976374375, "step": 1200 }, { "epoch": 1.44, "learning_rate": 0.0002966801048175771, "loss": 0.5859, "step": 1600 }, { "epoch": 1.44, "eval_loss": 0.3028641939163208, "eval_runtime": 210.4962, "eval_samples_per_second": 18.798, "eval_steps_per_second": 2.352, "eval_wer": 0.43102785447174363, "step": 1600 }, { "epoch": 1.8, "learning_rate": 0.00029547067123563797, "loss": 0.5272, "step": 2000 }, { "epoch": 1.8, "eval_loss": 0.2646712362766266, "eval_runtime": 208.1951, "eval_samples_per_second": 19.006, "eval_steps_per_second": 2.378, "eval_wer": 0.3874462656834306, "step": 2000 }, { "epoch": 2.17, "learning_rate": 0.00029426123765369884, "loss": 0.4691, "step": 2400 }, { "epoch": 2.17, "eval_loss": 0.25143691897392273, "eval_runtime": 208.3951, "eval_samples_per_second": 18.988, "eval_steps_per_second": 2.375, "eval_wer": 0.3728025722573655, "step": 2400 }, { "epoch": 2.53, "learning_rate": 0.0002930518040717597, "loss": 0.4313, "step": 2800 }, { "epoch": 2.53, "eval_loss": 0.23935846984386444, "eval_runtime": 208.5439, "eval_samples_per_second": 18.974, "eval_steps_per_second": 2.374, "eval_wer": 0.33872715199384895, "step": 2800 }, { "epoch": 2.89, "learning_rate": 0.0002918423704898206, "loss": 0.4157, "step": 3200 }, { "epoch": 2.89, "eval_loss": 0.22154289484024048, "eval_runtime": 207.5705, "eval_samples_per_second": 19.063, "eval_steps_per_second": 2.385, "eval_wer": 0.3165344423863279, "step": 3200 }, { "epoch": 3.25, "learning_rate": 0.00029063293690788144, "loss": 0.382, "step": 3600 }, { "epoch": 3.25, "eval_loss": 0.22716625034809113, "eval_runtime": 208.4179, "eval_samples_per_second": 18.986, "eval_steps_per_second": 2.375, "eval_wer": 0.310103798972495, "step": 3600 }, { "epoch": 3.61, "learning_rate": 0.0002894235033259423, "loss": 0.3636, "step": 4000 }, { "epoch": 3.61, "eval_loss": 0.21131443977355957, "eval_runtime": 206.1846, "eval_samples_per_second": 19.192, "eval_steps_per_second": 2.401, "eval_wer": 0.30538566385908505, "step": 4000 }, { "epoch": 3.97, "learning_rate": 0.00028821406974400323, "loss": 0.3665, "step": 4400 }, { "epoch": 3.97, "eval_loss": 0.21285590529441833, "eval_runtime": 205.9986, "eval_samples_per_second": 19.209, "eval_steps_per_second": 2.403, "eval_wer": 0.2907769195820082, "step": 4400 }, { "epoch": 4.33, "learning_rate": 0.00028700463616206405, "loss": 0.333, "step": 4800 }, { "epoch": 4.33, "eval_loss": 0.2077477127313614, "eval_runtime": 208.1768, "eval_samples_per_second": 19.008, "eval_steps_per_second": 2.378, "eval_wer": 0.2788243106280362, "step": 4800 }, { "epoch": 4.69, "learning_rate": 0.00028579520258012497, "loss": 0.3313, "step": 5200 }, { "epoch": 4.69, "eval_loss": 0.18993255496025085, "eval_runtime": 205.0681, "eval_samples_per_second": 19.296, "eval_steps_per_second": 2.414, "eval_wer": 0.2659979729493587, "step": 5200 }, { "epoch": 5.05, "learning_rate": 0.00028458576899818584, "loss": 0.3222, "step": 5600 }, { "epoch": 5.05, "eval_loss": 0.19064170122146606, "eval_runtime": 205.7167, "eval_samples_per_second": 19.235, "eval_steps_per_second": 2.406, "eval_wer": 0.2542201097403278, "step": 5600 }, { "epoch": 5.41, "learning_rate": 0.0002833763354162467, "loss": 0.3003, "step": 6000 }, { "epoch": 5.41, "eval_loss": 0.1921728551387787, "eval_runtime": 204.1327, "eval_samples_per_second": 19.384, "eval_steps_per_second": 2.425, "eval_wer": 0.2612099395379722, "step": 6000 }, { "epoch": 5.78, "learning_rate": 0.00028216690183430757, "loss": 0.3057, "step": 6400 }, { "epoch": 5.78, "eval_loss": 0.19207137823104858, "eval_runtime": 205.293, "eval_samples_per_second": 19.275, "eval_steps_per_second": 2.411, "eval_wer": 0.2560724146367036, "step": 6400 }, { "epoch": 6.14, "learning_rate": 0.0002809604918363233, "loss": 0.297, "step": 6800 }, { "epoch": 6.14, "eval_loss": 0.1768147200345993, "eval_runtime": 204.3464, "eval_samples_per_second": 19.364, "eval_steps_per_second": 2.422, "eval_wer": 0.24146367035962674, "step": 6800 }, { "epoch": 6.5, "learning_rate": 0.0002797510582543842, "loss": 0.2834, "step": 7200 }, { "epoch": 6.5, "eval_loss": 0.18651635944843292, "eval_runtime": 205.3683, "eval_samples_per_second": 19.268, "eval_steps_per_second": 2.41, "eval_wer": 0.24894278824310628, "step": 7200 }, { "epoch": 6.86, "learning_rate": 0.00027854162467244505, "loss": 0.2787, "step": 7600 }, { "epoch": 6.86, "eval_loss": 0.18522889912128448, "eval_runtime": 204.2237, "eval_samples_per_second": 19.376, "eval_steps_per_second": 2.424, "eval_wer": 0.24726522909167162, "step": 7600 }, { "epoch": 7.22, "learning_rate": 0.0002773321910905059, "loss": 0.2678, "step": 8000 }, { "epoch": 7.22, "eval_loss": 0.1824478805065155, "eval_runtime": 203.4819, "eval_samples_per_second": 19.446, "eval_steps_per_second": 2.433, "eval_wer": 0.23087407821619543, "step": 8000 }, { "epoch": 7.58, "learning_rate": 0.0002761227575085668, "loss": 0.2665, "step": 8400 }, { "epoch": 7.58, "eval_loss": 0.17768050730228424, "eval_runtime": 203.7918, "eval_samples_per_second": 19.417, "eval_steps_per_second": 2.429, "eval_wer": 0.2309789256631601, "step": 8400 }, { "epoch": 7.94, "learning_rate": 0.00027491332392662766, "loss": 0.2722, "step": 8800 }, { "epoch": 7.94, "eval_loss": 0.17655304074287415, "eval_runtime": 203.4054, "eval_samples_per_second": 19.454, "eval_steps_per_second": 2.434, "eval_wer": 0.23216719672875966, "step": 8800 }, { "epoch": 8.3, "learning_rate": 0.0002737038903446885, "loss": 0.2549, "step": 9200 }, { "epoch": 8.3, "eval_loss": 0.18208913505077362, "eval_runtime": 203.5406, "eval_samples_per_second": 19.441, "eval_steps_per_second": 2.432, "eval_wer": 0.23590675567049943, "step": 9200 }, { "epoch": 8.66, "learning_rate": 0.00027249748034670427, "loss": 0.2535, "step": 9600 }, { "epoch": 8.66, "eval_loss": 0.17272169888019562, "eval_runtime": 204.5912, "eval_samples_per_second": 19.341, "eval_steps_per_second": 2.419, "eval_wer": 0.22619089225177366, "step": 9600 }, { "epoch": 9.03, "learning_rate": 0.00027128804676476514, "loss": 0.2508, "step": 10000 }, { "epoch": 9.03, "eval_loss": 0.17887552082538605, "eval_runtime": 202.6986, "eval_samples_per_second": 19.522, "eval_steps_per_second": 2.442, "eval_wer": 0.22301051969384544, "step": 10000 }, { "epoch": 9.39, "learning_rate": 0.000270078613182826, "loss": 0.2354, "step": 10400 }, { "epoch": 9.39, "eval_loss": 0.17052198946475983, "eval_runtime": 202.2989, "eval_samples_per_second": 19.56, "eval_steps_per_second": 2.447, "eval_wer": 0.2227658756509279, "step": 10400 }, { "epoch": 9.75, "learning_rate": 0.0002688691796008869, "loss": 0.2449, "step": 10800 }, { "epoch": 9.75, "eval_loss": 0.17250895500183105, "eval_runtime": 202.4538, "eval_samples_per_second": 19.545, "eval_steps_per_second": 2.445, "eval_wer": 0.2203893335197288, "step": 10800 }, { "epoch": 10.11, "learning_rate": 0.0002676597460189478, "loss": 0.2461, "step": 11200 }, { "epoch": 10.11, "eval_loss": 0.18136142194271088, "eval_runtime": 202.9746, "eval_samples_per_second": 19.495, "eval_steps_per_second": 2.439, "eval_wer": 0.23083912906720722, "step": 11200 }, { "epoch": 10.47, "learning_rate": 0.0002664533360209635, "loss": 0.2354, "step": 11600 }, { "epoch": 10.47, "eval_loss": 0.17660640180110931, "eval_runtime": 202.5754, "eval_samples_per_second": 19.533, "eval_steps_per_second": 2.444, "eval_wer": 0.21902631670918812, "step": 11600 }, { "epoch": 10.83, "learning_rate": 0.00026524390243902435, "loss": 0.2271, "step": 12000 }, { "epoch": 10.83, "eval_loss": 0.16712866723537445, "eval_runtime": 204.6252, "eval_samples_per_second": 19.338, "eval_steps_per_second": 2.419, "eval_wer": 0.22643553629469124, "step": 12000 }, { "epoch": 11.19, "learning_rate": 0.0002640344688570853, "loss": 0.2262, "step": 12400 }, { "epoch": 11.19, "eval_loss": 0.16640537977218628, "eval_runtime": 205.9457, "eval_samples_per_second": 19.214, "eval_steps_per_second": 2.404, "eval_wer": 0.2237444518225981, "step": 12400 }, { "epoch": 11.55, "learning_rate": 0.0002628250352751461, "loss": 0.222, "step": 12800 }, { "epoch": 11.55, "eval_loss": 0.16319511830806732, "eval_runtime": 203.9378, "eval_samples_per_second": 19.403, "eval_steps_per_second": 2.427, "eval_wer": 0.20773774158599237, "step": 12800 }, { "epoch": 11.91, "learning_rate": 0.000261615601693207, "loss": 0.2254, "step": 13200 }, { "epoch": 11.91, "eval_loss": 0.16412602365016937, "eval_runtime": 203.9337, "eval_samples_per_second": 19.403, "eval_steps_per_second": 2.427, "eval_wer": 0.21906126585817634, "step": 13200 }, { "epoch": 12.27, "learning_rate": 0.0002604061681112679, "loss": 0.2198, "step": 13600 }, { "epoch": 12.27, "eval_loss": 0.16197219491004944, "eval_runtime": 202.2546, "eval_samples_per_second": 19.564, "eval_steps_per_second": 2.447, "eval_wer": 0.2122112326564848, "step": 13600 }, { "epoch": 12.63, "learning_rate": 0.00025919673452932875, "loss": 0.2167, "step": 14000 }, { "epoch": 12.63, "eval_loss": 0.15757694840431213, "eval_runtime": 202.2278, "eval_samples_per_second": 19.567, "eval_steps_per_second": 2.448, "eval_wer": 0.2082270296718275, "step": 14000 }, { "epoch": 13.0, "learning_rate": 0.0002579903245313445, "loss": 0.2138, "step": 14400 }, { "epoch": 13.0, "eval_loss": 0.16401419043540955, "eval_runtime": 202.7059, "eval_samples_per_second": 19.521, "eval_steps_per_second": 2.442, "eval_wer": 0.2156711984063188, "step": 14400 }, { "epoch": 13.36, "learning_rate": 0.0002567808909494053, "loss": 0.2112, "step": 14800 }, { "epoch": 13.36, "eval_loss": 0.17054887115955353, "eval_runtime": 203.4699, "eval_samples_per_second": 19.448, "eval_steps_per_second": 2.433, "eval_wer": 0.21696431691888302, "step": 14800 }, { "epoch": 13.72, "learning_rate": 0.00025557145736746623, "loss": 0.2038, "step": 15200 }, { "epoch": 13.72, "eval_loss": 0.1620979905128479, "eval_runtime": 203.3439, "eval_samples_per_second": 19.46, "eval_steps_per_second": 2.434, "eval_wer": 0.2075979449900395, "step": 15200 }, { "epoch": 14.08, "learning_rate": 0.0002543620237855271, "loss": 0.2101, "step": 15600 }, { "epoch": 14.08, "eval_loss": 0.16726231575012207, "eval_runtime": 203.5727, "eval_samples_per_second": 19.438, "eval_steps_per_second": 2.432, "eval_wer": 0.19962953902072483, "step": 15600 }, { "epoch": 14.44, "learning_rate": 0.00025315259020358797, "loss": 0.2013, "step": 16000 }, { "epoch": 14.44, "eval_loss": 0.16415779292583466, "eval_runtime": 202.8281, "eval_samples_per_second": 19.509, "eval_steps_per_second": 2.44, "eval_wer": 0.20885611435361548, "step": 16000 }, { "epoch": 14.8, "learning_rate": 0.00025194315662164883, "loss": 0.21, "step": 16400 }, { "epoch": 14.8, "eval_loss": 0.1616578996181488, "eval_runtime": 203.1464, "eval_samples_per_second": 19.479, "eval_steps_per_second": 2.437, "eval_wer": 0.2186768252193059, "step": 16400 }, { "epoch": 15.16, "learning_rate": 0.0002507337230397097, "loss": 0.2002, "step": 16800 }, { "epoch": 15.16, "eval_loss": 0.1627209484577179, "eval_runtime": 202.7336, "eval_samples_per_second": 19.518, "eval_steps_per_second": 2.442, "eval_wer": 0.21839723202740013, "step": 16800 }, { "epoch": 15.52, "learning_rate": 0.00024952428945777057, "loss": 0.2007, "step": 17200 }, { "epoch": 15.52, "eval_loss": 0.1601659506559372, "eval_runtime": 202.199, "eval_samples_per_second": 19.57, "eval_steps_per_second": 2.448, "eval_wer": 0.20924055499248592, "step": 17200 }, { "epoch": 15.88, "learning_rate": 0.0002483178794597863, "loss": 0.2042, "step": 17600 }, { "epoch": 15.88, "eval_loss": 0.15520912408828735, "eval_runtime": 202.5397, "eval_samples_per_second": 19.537, "eval_steps_per_second": 2.444, "eval_wer": 0.1952958445461853, "step": 17600 }, { "epoch": 16.25, "learning_rate": 0.0002471084458778472, "loss": 0.187, "step": 18000 }, { "epoch": 16.25, "eval_loss": 0.16360661387443542, "eval_runtime": 202.7459, "eval_samples_per_second": 19.517, "eval_steps_per_second": 2.441, "eval_wer": 0.19442211582147975, "step": 18000 }, { "epoch": 16.61, "learning_rate": 0.00024589901229590805, "loss": 0.1931, "step": 18400 }, { "epoch": 16.61, "eval_loss": 0.1533084213733673, "eval_runtime": 203.0495, "eval_samples_per_second": 19.488, "eval_steps_per_second": 2.438, "eval_wer": 0.1916960822003984, "step": 18400 }, { "epoch": 16.97, "learning_rate": 0.000244689578713969, "loss": 0.1891, "step": 18800 }, { "epoch": 16.97, "eval_loss": 0.16208086907863617, "eval_runtime": 202.8946, "eval_samples_per_second": 19.503, "eval_steps_per_second": 2.44, "eval_wer": 0.1907175060287282, "step": 18800 }, { "epoch": 17.33, "learning_rate": 0.00024348014513202981, "loss": 0.1863, "step": 19200 }, { "epoch": 17.33, "eval_loss": 0.15993733704090118, "eval_runtime": 202.8467, "eval_samples_per_second": 19.507, "eval_steps_per_second": 2.44, "eval_wer": 0.1905777094327753, "step": 19200 }, { "epoch": 17.69, "learning_rate": 0.00024227071155009068, "loss": 0.1863, "step": 19600 }, { "epoch": 17.69, "eval_loss": 0.15354220569133759, "eval_runtime": 202.9873, "eval_samples_per_second": 19.494, "eval_steps_per_second": 2.439, "eval_wer": 0.1911718449655751, "step": 19600 }, { "epoch": 18.05, "learning_rate": 0.00024106127796815155, "loss": 0.1827, "step": 20000 }, { "epoch": 18.05, "eval_loss": 0.1616511195898056, "eval_runtime": 204.4003, "eval_samples_per_second": 19.359, "eval_steps_per_second": 2.422, "eval_wer": 0.1880264215566351, "step": 20000 }, { "epoch": 18.41, "learning_rate": 0.00023985184438621244, "loss": 0.1822, "step": 20400 }, { "epoch": 18.41, "eval_loss": 0.1649104207754135, "eval_runtime": 206.8059, "eval_samples_per_second": 19.134, "eval_steps_per_second": 2.394, "eval_wer": 0.18806137070562332, "step": 20400 }, { "epoch": 18.77, "learning_rate": 0.0002386424108042733, "loss": 0.1841, "step": 20800 }, { "epoch": 18.77, "eval_loss": 0.16039612889289856, "eval_runtime": 204.0742, "eval_samples_per_second": 19.39, "eval_steps_per_second": 2.426, "eval_wer": 0.18942438751616397, "step": 20800 }, { "epoch": 19.13, "learning_rate": 0.00023743297722233418, "loss": 0.1817, "step": 21200 }, { "epoch": 19.13, "eval_loss": 0.157293900847435, "eval_runtime": 202.3143, "eval_samples_per_second": 19.559, "eval_steps_per_second": 2.447, "eval_wer": 0.1810715409079789, "step": 21200 }, { "epoch": 19.49, "learning_rate": 0.00023622959080830475, "loss": 0.1756, "step": 21600 }, { "epoch": 19.49, "eval_loss": 0.16328036785125732, "eval_runtime": 204.2348, "eval_samples_per_second": 19.375, "eval_steps_per_second": 2.424, "eval_wer": 0.18809631985461153, "step": 21600 }, { "epoch": 19.86, "learning_rate": 0.00023502015722636564, "loss": 0.1804, "step": 22000 }, { "epoch": 19.86, "eval_loss": 0.1585751324892044, "eval_runtime": 204.9615, "eval_samples_per_second": 19.306, "eval_steps_per_second": 2.415, "eval_wer": 0.18823611645056443, "step": 22000 }, { "epoch": 20.22, "learning_rate": 0.0002338107236444265, "loss": 0.1736, "step": 22400 }, { "epoch": 20.22, "eval_loss": 0.15415678918361664, "eval_runtime": 202.4017, "eval_samples_per_second": 19.55, "eval_steps_per_second": 2.446, "eval_wer": 0.19033306538985775, "step": 22400 }, { "epoch": 20.58, "learning_rate": 0.00023260129006248738, "loss": 0.1704, "step": 22800 }, { "epoch": 20.58, "eval_loss": 0.1622617095708847, "eval_runtime": 206.2682, "eval_samples_per_second": 19.184, "eval_steps_per_second": 2.4, "eval_wer": 0.1983014713591724, "step": 22800 }, { "epoch": 20.94, "learning_rate": 0.00023139185648054828, "loss": 0.1741, "step": 23200 }, { "epoch": 20.94, "eval_loss": 0.15422095358371735, "eval_runtime": 208.5273, "eval_samples_per_second": 18.976, "eval_steps_per_second": 2.374, "eval_wer": 0.19568028518505573, "step": 23200 }, { "epoch": 21.3, "learning_rate": 0.00023018242289860912, "loss": 0.1642, "step": 23600 }, { "epoch": 21.3, "eval_loss": 0.155488982796669, "eval_runtime": 206.3858, "eval_samples_per_second": 19.173, "eval_steps_per_second": 2.398, "eval_wer": 0.19704330199559641, "step": 23600 }, { "epoch": 21.66, "learning_rate": 0.00022897601290062486, "loss": 0.1707, "step": 24000 }, { "epoch": 21.66, "eval_loss": 0.15275663137435913, "eval_runtime": 206.6413, "eval_samples_per_second": 19.149, "eval_steps_per_second": 2.395, "eval_wer": 0.18442665921084822, "step": 24000 }, { "epoch": 22.02, "learning_rate": 0.00022776657931868573, "loss": 0.1673, "step": 24400 }, { "epoch": 22.02, "eval_loss": 0.15888585150241852, "eval_runtime": 208.9307, "eval_samples_per_second": 18.939, "eval_steps_per_second": 2.369, "eval_wer": 0.1858246251703771, "step": 24400 }, { "epoch": 22.38, "learning_rate": 0.0002265571457367466, "loss": 0.1593, "step": 24800 }, { "epoch": 22.38, "eval_loss": 0.15319041907787323, "eval_runtime": 206.6808, "eval_samples_per_second": 19.145, "eval_steps_per_second": 2.395, "eval_wer": 0.17750672771118023, "step": 24800 }, { "epoch": 22.74, "learning_rate": 0.0002253477121548075, "loss": 0.1595, "step": 25200 }, { "epoch": 22.74, "eval_loss": 0.15495522320270538, "eval_runtime": 211.5152, "eval_samples_per_second": 18.708, "eval_steps_per_second": 2.34, "eval_wer": 0.1755495753678398, "step": 25200 }, { "epoch": 23.1, "learning_rate": 0.00022413827857286836, "loss": 0.1598, "step": 25600 }, { "epoch": 23.1, "eval_loss": 0.16047397255897522, "eval_runtime": 213.237, "eval_samples_per_second": 18.557, "eval_steps_per_second": 2.321, "eval_wer": 0.18624401495823575, "step": 25600 }, { "epoch": 23.47, "learning_rate": 0.00022293186857488405, "loss": 0.1573, "step": 26000 }, { "epoch": 23.47, "eval_loss": 0.15952068567276, "eval_runtime": 205.0554, "eval_samples_per_second": 19.297, "eval_steps_per_second": 2.414, "eval_wer": 0.17796106664802713, "step": 26000 }, { "epoch": 23.83, "learning_rate": 0.00022172243499294495, "loss": 0.1596, "step": 26400 }, { "epoch": 23.83, "eval_loss": 0.15038904547691345, "eval_runtime": 203.7621, "eval_samples_per_second": 19.42, "eval_steps_per_second": 2.429, "eval_wer": 0.17879984622374445, "step": 26400 }, { "epoch": 24.19, "learning_rate": 0.00022051300141100584, "loss": 0.1589, "step": 26800 }, { "epoch": 24.19, "eval_loss": 0.15989159047603607, "eval_runtime": 202.8293, "eval_samples_per_second": 19.509, "eval_steps_per_second": 2.44, "eval_wer": 0.17677279558242756, "step": 26800 }, { "epoch": 24.55, "learning_rate": 0.00021930356782906668, "loss": 0.1551, "step": 27200 }, { "epoch": 24.55, "eval_loss": 0.15309767425060272, "eval_runtime": 203.5071, "eval_samples_per_second": 19.444, "eval_steps_per_second": 2.432, "eval_wer": 0.1685946947191836, "step": 27200 }, { "epoch": 24.91, "learning_rate": 0.00021809413424712758, "loss": 0.1593, "step": 27600 }, { "epoch": 24.91, "eval_loss": 0.1611892282962799, "eval_runtime": 203.404, "eval_samples_per_second": 19.454, "eval_steps_per_second": 2.434, "eval_wer": 0.181560828993814, "step": 27600 }, { "epoch": 25.27, "learning_rate": 0.00021688470066518845, "loss": 0.1519, "step": 28000 }, { "epoch": 25.27, "eval_loss": 0.16373491287231445, "eval_runtime": 203.251, "eval_samples_per_second": 19.469, "eval_steps_per_second": 2.435, "eval_wer": 0.17638835494355712, "step": 28000 }, { "epoch": 25.63, "learning_rate": 0.0002156752670832493, "loss": 0.1509, "step": 28400 }, { "epoch": 25.63, "eval_loss": 0.15399527549743652, "eval_runtime": 203.0998, "eval_samples_per_second": 19.483, "eval_steps_per_second": 2.437, "eval_wer": 0.17093628770139446, "step": 28400 }, { "epoch": 25.99, "learning_rate": 0.0002144658335013102, "loss": 0.1524, "step": 28800 }, { "epoch": 25.99, "eval_loss": 0.1519121676683426, "eval_runtime": 203.5041, "eval_samples_per_second": 19.444, "eval_steps_per_second": 2.432, "eval_wer": 0.18072204941809666, "step": 28800 }, { "epoch": 26.35, "learning_rate": 0.00021325639991937108, "loss": 0.1452, "step": 29200 }, { "epoch": 26.35, "eval_loss": 0.15314562618732452, "eval_runtime": 203.4098, "eval_samples_per_second": 19.453, "eval_steps_per_second": 2.434, "eval_wer": 0.17310313493866425, "step": 29200 }, { "epoch": 26.71, "learning_rate": 0.0002120499899213868, "loss": 0.1485, "step": 29600 }, { "epoch": 26.71, "eval_loss": 0.14827923476696014, "eval_runtime": 203.2718, "eval_samples_per_second": 19.467, "eval_steps_per_second": 2.435, "eval_wer": 0.16136022087862162, "step": 29600 }, { "epoch": 27.08, "learning_rate": 0.0002108405563394477, "loss": 0.1513, "step": 30000 }, { "epoch": 27.08, "eval_loss": 0.15502476692199707, "eval_runtime": 203.5744, "eval_samples_per_second": 19.438, "eval_steps_per_second": 2.432, "eval_wer": 0.16579876280012582, "step": 30000 }, { "epoch": 27.44, "learning_rate": 0.00020963112275750853, "loss": 0.1437, "step": 30400 }, { "epoch": 27.44, "eval_loss": 0.15631796419620514, "eval_runtime": 204.0627, "eval_samples_per_second": 19.391, "eval_steps_per_second": 2.426, "eval_wer": 0.16614825429000804, "step": 30400 }, { "epoch": 27.8, "learning_rate": 0.00020842168917556943, "loss": 0.1445, "step": 30800 }, { "epoch": 27.8, "eval_loss": 0.15840862691402435, "eval_runtime": 204.5713, "eval_samples_per_second": 19.343, "eval_steps_per_second": 2.42, "eval_wer": 0.17086638940341803, "step": 30800 }, { "epoch": 28.16, "learning_rate": 0.0002072122555936303, "loss": 0.1438, "step": 31200 }, { "epoch": 28.16, "eval_loss": 0.15680429339408875, "eval_runtime": 204.0022, "eval_samples_per_second": 19.397, "eval_steps_per_second": 2.426, "eval_wer": 0.1630727291790445, "step": 31200 }, { "epoch": 28.52, "learning_rate": 0.00020600282201169116, "loss": 0.1423, "step": 31600 }, { "epoch": 28.52, "eval_loss": 0.15523846447467804, "eval_runtime": 203.5692, "eval_samples_per_second": 19.438, "eval_steps_per_second": 2.432, "eval_wer": 0.16090588194177471, "step": 31600 }, { "epoch": 28.88, "learning_rate": 0.00020479338842975206, "loss": 0.1451, "step": 32000 }, { "epoch": 28.88, "eval_loss": 0.1542510688304901, "eval_runtime": 203.7413, "eval_samples_per_second": 19.422, "eval_steps_per_second": 2.43, "eval_wer": 0.16314262747702094, "step": 32000 }, { "epoch": 29.24, "learning_rate": 0.00020358395484781292, "loss": 0.1396, "step": 32400 }, { "epoch": 29.24, "eval_loss": 0.15091465413570404, "eval_runtime": 203.6781, "eval_samples_per_second": 19.428, "eval_steps_per_second": 2.43, "eval_wer": 0.16317757662600915, "step": 32400 }, { "epoch": 29.6, "learning_rate": 0.0002023745212658738, "loss": 0.142, "step": 32800 }, { "epoch": 29.6, "eval_loss": 0.15581683814525604, "eval_runtime": 203.4725, "eval_samples_per_second": 19.447, "eval_steps_per_second": 2.433, "eval_wer": 0.1635620172648796, "step": 32800 }, { "epoch": 29.96, "learning_rate": 0.0002011650876839347, "loss": 0.1385, "step": 33200 }, { "epoch": 29.96, "eval_loss": 0.145892933011055, "eval_runtime": 203.8769, "eval_samples_per_second": 19.409, "eval_steps_per_second": 2.428, "eval_wer": 0.17338272813057, "step": 33200 }, { "epoch": 30.32, "learning_rate": 0.00019995565410199556, "loss": 0.1376, "step": 33600 }, { "epoch": 30.32, "eval_loss": 0.15049244463443756, "eval_runtime": 203.9232, "eval_samples_per_second": 19.404, "eval_steps_per_second": 2.427, "eval_wer": 0.17236920280991158, "step": 33600 }, { "epoch": 30.69, "learning_rate": 0.00019874924410401127, "loss": 0.1399, "step": 34000 }, { "epoch": 30.69, "eval_loss": 0.14067970216274261, "eval_runtime": 203.8686, "eval_samples_per_second": 19.41, "eval_steps_per_second": 2.428, "eval_wer": 0.1610107293887394, "step": 34000 }, { "epoch": 31.05, "learning_rate": 0.00019753981052207217, "loss": 0.1377, "step": 34400 }, { "epoch": 31.05, "eval_loss": 0.1523168534040451, "eval_runtime": 203.6263, "eval_samples_per_second": 19.433, "eval_steps_per_second": 2.431, "eval_wer": 0.17292838919372314, "step": 34400 }, { "epoch": 31.41, "learning_rate": 0.000196330376940133, "loss": 0.1331, "step": 34800 }, { "epoch": 31.41, "eval_loss": 0.14309507608413696, "eval_runtime": 204.8423, "eval_samples_per_second": 19.317, "eval_steps_per_second": 2.416, "eval_wer": 0.1706566945094887, "step": 34800 }, { "epoch": 31.77, "learning_rate": 0.0001951209433581939, "loss": 0.1356, "step": 35200 }, { "epoch": 31.77, "eval_loss": 0.1558520793914795, "eval_runtime": 204.495, "eval_samples_per_second": 19.35, "eval_steps_per_second": 2.421, "eval_wer": 0.16391150875476182, "step": 35200 }, { "epoch": 32.13, "learning_rate": 0.00019391150977625477, "loss": 0.1381, "step": 35600 }, { "epoch": 32.13, "eval_loss": 0.14920052886009216, "eval_runtime": 204.3124, "eval_samples_per_second": 19.367, "eval_steps_per_second": 2.423, "eval_wer": 0.1660783559920316, "step": 35600 }, { "epoch": 32.49, "learning_rate": 0.00019270207619431564, "loss": 0.1332, "step": 36000 }, { "epoch": 32.49, "eval_loss": 0.15177668631076813, "eval_runtime": 204.8291, "eval_samples_per_second": 19.319, "eval_steps_per_second": 2.417, "eval_wer": 0.16139517002760984, "step": 36000 }, { "epoch": 32.85, "learning_rate": 0.00019149264261237654, "loss": 0.1345, "step": 36400 }, { "epoch": 32.85, "eval_loss": 0.14764094352722168, "eval_runtime": 204.5031, "eval_samples_per_second": 19.349, "eval_steps_per_second": 2.421, "eval_wer": 0.1562925942753294, "step": 36400 }, { "epoch": 33.21, "learning_rate": 0.00019028320903043738, "loss": 0.1307, "step": 36800 }, { "epoch": 33.21, "eval_loss": 0.1433647871017456, "eval_runtime": 205.0597, "eval_samples_per_second": 19.297, "eval_steps_per_second": 2.414, "eval_wer": 0.15922832279034005, "step": 36800 }, { "epoch": 33.57, "learning_rate": 0.00018907679903245312, "loss": 0.1309, "step": 37200 }, { "epoch": 33.57, "eval_loss": 0.1497233361005783, "eval_runtime": 204.4523, "eval_samples_per_second": 19.354, "eval_steps_per_second": 2.421, "eval_wer": 0.17090133855240625, "step": 37200 }, { "epoch": 33.93, "learning_rate": 0.00018786736545051402, "loss": 0.1319, "step": 37600 }, { "epoch": 33.93, "eval_loss": 0.15207722783088684, "eval_runtime": 204.6556, "eval_samples_per_second": 19.335, "eval_steps_per_second": 2.419, "eval_wer": 0.1708314402544298, "step": 37600 }, { "epoch": 34.3, "learning_rate": 0.00018665793186857486, "loss": 0.1279, "step": 38000 }, { "epoch": 34.3, "eval_loss": 0.14533580839633942, "eval_runtime": 205.2587, "eval_samples_per_second": 19.278, "eval_steps_per_second": 2.412, "eval_wer": 0.1569566281061056, "step": 38000 }, { "epoch": 34.66, "learning_rate": 0.00018544849828663575, "loss": 0.1268, "step": 38400 }, { "epoch": 34.66, "eval_loss": 0.14409175515174866, "eval_runtime": 204.8962, "eval_samples_per_second": 19.312, "eval_steps_per_second": 2.416, "eval_wer": 0.1608709327927865, "step": 38400 }, { "epoch": 35.02, "learning_rate": 0.0001842390647046966, "loss": 0.1301, "step": 38800 }, { "epoch": 35.02, "eval_loss": 0.1421143114566803, "eval_runtime": 205.2147, "eval_samples_per_second": 19.282, "eval_steps_per_second": 2.412, "eval_wer": 0.1567818823611645, "step": 38800 }, { "epoch": 35.38, "learning_rate": 0.0001830296311227575, "loss": 0.1248, "step": 39200 }, { "epoch": 35.38, "eval_loss": 0.14348700642585754, "eval_runtime": 209.2746, "eval_samples_per_second": 18.908, "eval_steps_per_second": 2.365, "eval_wer": 0.15727117044699962, "step": 39200 }, { "epoch": 35.74, "learning_rate": 0.00018182322112477323, "loss": 0.1256, "step": 39600 }, { "epoch": 35.74, "eval_loss": 0.14100748300552368, "eval_runtime": 214.9156, "eval_samples_per_second": 18.412, "eval_steps_per_second": 2.303, "eval_wer": 0.1528326285254954, "step": 39600 } ], "max_steps": 99720, "num_train_epochs": 90, "total_flos": 1.728945548938538e+20, "trial_name": null, "trial_params": null }