{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.512554112554113, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 8.333333333333334e-06, "loss": 8.4908, "step": 50 }, { "epoch": 0.35, "learning_rate": 1.6666666666666667e-05, "loss": 8.1905, "step": 100 }, { "epoch": 0.35, "eval_loss": 771.380126953125, "eval_runtime": 237.9275, "eval_samples_per_second": 7.061, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.52, "learning_rate": 2.5e-05, "loss": 6.0833, "step": 150 }, { "epoch": 0.69, "learning_rate": 3.3333333333333335e-05, "loss": 4.2899, "step": 200 }, { "epoch": 0.69, "eval_loss": 315.0731506347656, "eval_runtime": 181.0423, "eval_samples_per_second": 9.28, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.87, "learning_rate": 4.1666666666666665e-05, "loss": 3.5966, "step": 250 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 3.2533, "step": 300 }, { "epoch": 1.04, "eval_loss": 284.96343994140625, "eval_runtime": 179.8422, "eval_samples_per_second": 9.342, "eval_wer": 0.9928532160527762, "step": 300 }, { "epoch": 1.21, "learning_rate": 5.833333333333333e-05, "loss": 3.0363, "step": 350 }, { "epoch": 1.39, "learning_rate": 6.666666666666667e-05, "loss": 3.0043, "step": 400 }, { "epoch": 1.39, "eval_loss": 280.4071044921875, "eval_runtime": 180.8047, "eval_samples_per_second": 9.292, "eval_wer": 1.0, "step": 400 }, { "epoch": 1.56, "learning_rate": 7.5e-05, "loss": 2.9767, "step": 450 }, { "epoch": 1.73, "learning_rate": 8.333333333333333e-05, "loss": 2.9848, "step": 500 }, { "epoch": 1.73, "eval_loss": 279.59625244140625, "eval_runtime": 179.9553, "eval_samples_per_second": 9.336, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.91, "learning_rate": 9.166666666666667e-05, "loss": 2.967, "step": 550 }, { "epoch": 2.08, "learning_rate": 0.0001, "loss": 3.0088, "step": 600 }, { "epoch": 2.08, "eval_loss": 276.5469665527344, "eval_runtime": 181.9549, "eval_samples_per_second": 9.233, "eval_wer": 1.0, "step": 600 }, { "epoch": 2.26, "learning_rate": 0.00010833333333333334, "loss": 2.945, "step": 650 }, { "epoch": 2.43, "learning_rate": 0.00011666666666666667, "loss": 2.9085, "step": 700 }, { "epoch": 2.43, "eval_loss": 261.8099365234375, "eval_runtime": 182.2193, "eval_samples_per_second": 9.22, "eval_wer": 1.0, "step": 700 }, { "epoch": 2.6, "learning_rate": 0.000125, "loss": 2.6998, "step": 750 }, { "epoch": 2.78, "learning_rate": 0.00013333333333333334, "loss": 2.0025, "step": 800 }, { "epoch": 2.78, "eval_loss": 113.09307098388672, "eval_runtime": 182.7306, "eval_samples_per_second": 9.194, "eval_wer": 0.919392523364486, "step": 800 }, { "epoch": 2.95, "learning_rate": 0.00014166666666666668, "loss": 1.3056, "step": 850 }, { "epoch": 3.12, "learning_rate": 0.00015, "loss": 0.9594, "step": 900 }, { "epoch": 3.12, "eval_loss": 48.241703033447266, "eval_runtime": 182.3931, "eval_samples_per_second": 9.211, "eval_wer": 0.44193238042880706, "step": 900 }, { "epoch": 3.3, "learning_rate": 0.00015833333333333332, "loss": 0.7557, "step": 950 }, { "epoch": 3.47, "learning_rate": 0.00016666666666666666, "loss": 0.6721, "step": 1000 }, { "epoch": 3.47, "eval_loss": 36.373294830322266, "eval_runtime": 182.5274, "eval_samples_per_second": 9.204, "eval_wer": 0.3460005497526113, "step": 1000 }, { "epoch": 3.64, "learning_rate": 0.000175, "loss": 0.5788, "step": 1050 }, { "epoch": 3.82, "learning_rate": 0.00018333333333333334, "loss": 0.5235, "step": 1100 }, { "epoch": 3.82, "eval_loss": 29.015974044799805, "eval_runtime": 182.315, "eval_samples_per_second": 9.215, "eval_wer": 0.2875893347993403, "step": 1100 }, { "epoch": 3.99, "learning_rate": 0.00019166666666666667, "loss": 0.5232, "step": 1150 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 0.4547, "step": 1200 }, { "epoch": 4.17, "eval_loss": 26.849903106689453, "eval_runtime": 181.8627, "eval_samples_per_second": 9.238, "eval_wer": 0.26580538757559097, "step": 1200 }, { "epoch": 4.34, "learning_rate": 0.00020833333333333335, "loss": 0.4091, "step": 1250 }, { "epoch": 4.51, "learning_rate": 0.00021666666666666668, "loss": 0.4003, "step": 1300 }, { "epoch": 4.51, "eval_loss": 24.326810836791992, "eval_runtime": 180.904, "eval_samples_per_second": 9.287, "eval_wer": 0.23735568993952721, "step": 1300 }, { "epoch": 4.69, "learning_rate": 0.00022500000000000002, "loss": 0.3822, "step": 1350 }, { "epoch": 4.86, "learning_rate": 0.00023333333333333333, "loss": 0.3476, "step": 1400 }, { "epoch": 4.86, "eval_loss": 22.954877853393555, "eval_runtime": 182.9264, "eval_samples_per_second": 9.184, "eval_wer": 0.22161902144035184, "step": 1400 }, { "epoch": 5.03, "learning_rate": 0.00024166666666666667, "loss": 0.3594, "step": 1450 }, { "epoch": 5.21, "learning_rate": 0.00025, "loss": 0.3125, "step": 1500 }, { "epoch": 5.21, "eval_loss": 22.298479080200195, "eval_runtime": 181.7168, "eval_samples_per_second": 9.245, "eval_wer": 0.20794392523364486, "step": 1500 }, { "epoch": 5.38, "learning_rate": 0.00025833333333333334, "loss": 0.2878, "step": 1550 }, { "epoch": 5.55, "learning_rate": 0.0002666666666666667, "loss": 0.2762, "step": 1600 }, { "epoch": 5.55, "eval_loss": 23.098875045776367, "eval_runtime": 183.2329, "eval_samples_per_second": 9.169, "eval_wer": 0.21247938427707533, "step": 1600 }, { "epoch": 5.73, "learning_rate": 0.000275, "loss": 0.2703, "step": 1650 }, { "epoch": 5.9, "learning_rate": 0.00028333333333333335, "loss": 0.2827, "step": 1700 }, { "epoch": 5.9, "eval_loss": 21.197721481323242, "eval_runtime": 182.7196, "eval_samples_per_second": 9.194, "eval_wer": 0.19529961517317207, "step": 1700 }, { "epoch": 6.08, "learning_rate": 0.0002916666666666667, "loss": 0.2624, "step": 1750 }, { "epoch": 6.25, "learning_rate": 0.0003, "loss": 0.2306, "step": 1800 }, { "epoch": 6.25, "eval_loss": 22.732194900512695, "eval_runtime": 181.3273, "eval_samples_per_second": 9.265, "eval_wer": 0.18973336998350743, "step": 1800 }, { "epoch": 6.42, "learning_rate": 0.00030833333333333337, "loss": 0.2384, "step": 1850 }, { "epoch": 6.6, "learning_rate": 0.00031666666666666665, "loss": 0.2362, "step": 1900 }, { "epoch": 6.6, "eval_loss": 22.076433181762695, "eval_runtime": 181.988, "eval_samples_per_second": 9.231, "eval_wer": 0.18629741616272677, "step": 1900 }, { "epoch": 6.77, "learning_rate": 0.00032500000000000004, "loss": 0.2269, "step": 1950 }, { "epoch": 6.94, "learning_rate": 0.0003333333333333333, "loss": 0.2342, "step": 2000 }, { "epoch": 6.94, "eval_loss": 22.727157592773438, "eval_runtime": 182.2314, "eval_samples_per_second": 9.219, "eval_wer": 0.17983782297965914, "step": 2000 }, { "epoch": 7.12, "learning_rate": 0.00034166666666666666, "loss": 0.2271, "step": 2050 }, { "epoch": 7.29, "learning_rate": 0.00035, "loss": 0.2165, "step": 2100 }, { "epoch": 7.29, "eval_loss": 21.797693252563477, "eval_runtime": 182.6296, "eval_samples_per_second": 9.199, "eval_wer": 0.17468389224848818, "step": 2100 }, { "epoch": 7.46, "learning_rate": 0.00035833333333333333, "loss": 0.2037, "step": 2150 }, { "epoch": 7.64, "learning_rate": 0.00036666666666666667, "loss": 0.2134, "step": 2200 }, { "epoch": 7.64, "eval_loss": 22.025192260742188, "eval_runtime": 182.2586, "eval_samples_per_second": 9.218, "eval_wer": 0.18148708081363388, "step": 2200 }, { "epoch": 7.81, "learning_rate": 0.000375, "loss": 0.2096, "step": 2250 }, { "epoch": 7.98, "learning_rate": 0.00038333333333333334, "loss": 0.2033, "step": 2300 }, { "epoch": 7.98, "eval_loss": 21.954696655273438, "eval_runtime": 182.9541, "eval_samples_per_second": 9.183, "eval_wer": 0.17990654205607476, "step": 2300 }, { "epoch": 8.16, "learning_rate": 0.0003916666666666667, "loss": 0.2029, "step": 2350 }, { "epoch": 8.33, "learning_rate": 0.0004, "loss": 0.1948, "step": 2400 }, { "epoch": 8.33, "eval_loss": 20.202104568481445, "eval_runtime": 181.6845, "eval_samples_per_second": 9.247, "eval_wer": 0.17303463441451347, "step": 2400 }, { "epoch": 8.51, "learning_rate": 0.00040833333333333336, "loss": 0.1828, "step": 2450 }, { "epoch": 8.68, "learning_rate": 0.0004166666666666667, "loss": 0.1876, "step": 2500 }, { "epoch": 8.68, "eval_loss": 23.213821411132812, "eval_runtime": 182.8529, "eval_samples_per_second": 9.188, "eval_wer": 0.1705607476635514, "step": 2500 }, { "epoch": 8.85, "learning_rate": 0.000425, "loss": 0.1721, "step": 2550 }, { "epoch": 9.03, "learning_rate": 0.00043333333333333337, "loss": 0.1774, "step": 2600 }, { "epoch": 9.03, "eval_loss": 24.49812126159668, "eval_runtime": 181.9478, "eval_samples_per_second": 9.233, "eval_wer": 0.174890049477735, "step": 2600 }, { "epoch": 9.2, "learning_rate": 0.00044166666666666665, "loss": 0.1613, "step": 2650 }, { "epoch": 9.37, "learning_rate": 0.00045000000000000004, "loss": 0.183, "step": 2700 }, { "epoch": 9.37, "eval_loss": 25.343666076660156, "eval_runtime": 181.6008, "eval_samples_per_second": 9.251, "eval_wer": 0.1805937328202309, "step": 2700 }, { "epoch": 9.55, "learning_rate": 0.0004583333333333333, "loss": 0.1738, "step": 2750 }, { "epoch": 9.72, "learning_rate": 0.00046666666666666666, "loss": 0.1886, "step": 2800 }, { "epoch": 9.72, "eval_loss": 24.328418731689453, "eval_runtime": 182.0168, "eval_samples_per_second": 9.23, "eval_wer": 0.18519791094007695, "step": 2800 }, { "epoch": 9.89, "learning_rate": 0.000475, "loss": 0.1855, "step": 2850 }, { "epoch": 10.07, "learning_rate": 0.00048333333333333334, "loss": 0.1784, "step": 2900 }, { "epoch": 10.07, "eval_loss": 24.794740676879883, "eval_runtime": 182.2346, "eval_samples_per_second": 9.219, "eval_wer": 0.1759208356239692, "step": 2900 }, { "epoch": 10.24, "learning_rate": 0.0004916666666666666, "loss": 0.1639, "step": 2950 }, { "epoch": 10.42, "learning_rate": 0.0005, "loss": 0.1951, "step": 3000 }, { "epoch": 10.42, "eval_loss": 23.346879959106445, "eval_runtime": 183.0341, "eval_samples_per_second": 9.179, "eval_wer": 0.1854040681693238, "step": 3000 }, { "epoch": 10.59, "learning_rate": 0.0004955673758865248, "loss": 0.1841, "step": 3050 }, { "epoch": 10.76, "learning_rate": 0.0004911347517730497, "loss": 0.1638, "step": 3100 }, { "epoch": 10.76, "eval_loss": 26.021047592163086, "eval_runtime": 182.7843, "eval_samples_per_second": 9.191, "eval_wer": 0.18945849367784498, "step": 3100 }, { "epoch": 10.94, "learning_rate": 0.0004867021276595745, "loss": 0.1699, "step": 3150 }, { "epoch": 11.11, "learning_rate": 0.00048226950354609925, "loss": 0.178, "step": 3200 }, { "epoch": 11.11, "eval_loss": 25.51027488708496, "eval_runtime": 182.9857, "eval_samples_per_second": 9.181, "eval_wer": 0.18499175371083013, "step": 3200 }, { "epoch": 11.28, "learning_rate": 0.0004778368794326241, "loss": 0.1443, "step": 3250 }, { "epoch": 11.46, "learning_rate": 0.00047340425531914893, "loss": 0.1448, "step": 3300 }, { "epoch": 11.46, "eval_loss": 23.841064453125, "eval_runtime": 182.7518, "eval_samples_per_second": 9.193, "eval_wer": 0.17702034084661902, "step": 3300 }, { "epoch": 11.63, "learning_rate": 0.00046897163120567377, "loss": 0.1415, "step": 3350 }, { "epoch": 11.8, "learning_rate": 0.0004645390070921986, "loss": 0.1451, "step": 3400 }, { "epoch": 11.8, "eval_loss": 24.386722564697266, "eval_runtime": 182.0009, "eval_samples_per_second": 9.231, "eval_wer": 0.17440901594282573, "step": 3400 }, { "epoch": 11.98, "learning_rate": 0.00046010638297872344, "loss": 0.1613, "step": 3450 }, { "epoch": 12.15, "learning_rate": 0.0004556737588652483, "loss": 0.1479, "step": 3500 }, { "epoch": 12.15, "eval_loss": 25.48659324645996, "eval_runtime": 183.184, "eval_samples_per_second": 9.171, "eval_wer": 0.18196811434854315, "step": 3500 }, { "epoch": 12.33, "learning_rate": 0.00045124113475177307, "loss": 0.1249, "step": 3550 }, { "epoch": 12.5, "learning_rate": 0.00044680851063829785, "loss": 0.1255, "step": 3600 }, { "epoch": 12.5, "eval_loss": 26.741592407226562, "eval_runtime": 182.0975, "eval_samples_per_second": 9.226, "eval_wer": 0.17447773501924135, "step": 3600 }, { "epoch": 12.67, "learning_rate": 0.0004423758865248227, "loss": 0.1236, "step": 3650 }, { "epoch": 12.85, "learning_rate": 0.0004379432624113475, "loss": 0.1307, "step": 3700 }, { "epoch": 12.85, "eval_loss": 25.000507354736328, "eval_runtime": 182.3463, "eval_samples_per_second": 9.213, "eval_wer": 0.1726910390324354, "step": 3700 }, { "epoch": 13.02, "learning_rate": 0.00043351063829787236, "loss": 0.1504, "step": 3750 }, { "epoch": 13.19, "learning_rate": 0.0004290780141843972, "loss": 0.1185, "step": 3800 }, { "epoch": 13.19, "eval_loss": 24.76068115234375, "eval_runtime": 182.3064, "eval_samples_per_second": 9.215, "eval_wer": 0.1630703683342496, "step": 3800 }, { "epoch": 13.37, "learning_rate": 0.000424645390070922, "loss": 0.1151, "step": 3850 }, { "epoch": 13.54, "learning_rate": 0.0004202127659574468, "loss": 0.1296, "step": 3900 }, { "epoch": 13.54, "eval_loss": 24.10735511779785, "eval_runtime": 183.409, "eval_samples_per_second": 9.16, "eval_wer": 0.16197086311159978, "step": 3900 }, { "epoch": 13.71, "learning_rate": 0.0004157801418439716, "loss": 0.1099, "step": 3950 }, { "epoch": 13.89, "learning_rate": 0.00041134751773049644, "loss": 0.1141, "step": 4000 }, { "epoch": 13.89, "eval_loss": 27.41958236694336, "eval_runtime": 182.5069, "eval_samples_per_second": 9.205, "eval_wer": 0.17647058823529413, "step": 4000 }, { "epoch": 14.06, "learning_rate": 0.0004069148936170213, "loss": 0.1016, "step": 4050 }, { "epoch": 14.24, "learning_rate": 0.0004024822695035461, "loss": 0.1112, "step": 4100 }, { "epoch": 14.24, "eval_loss": 26.094676971435547, "eval_runtime": 182.6788, "eval_samples_per_second": 9.196, "eval_wer": 0.16760582737768004, "step": 4100 }, { "epoch": 14.41, "learning_rate": 0.00039804964539007096, "loss": 0.0982, "step": 4150 }, { "epoch": 14.58, "learning_rate": 0.00039361702127659574, "loss": 0.1021, "step": 4200 }, { "epoch": 14.58, "eval_loss": 27.608373641967773, "eval_runtime": 182.4822, "eval_samples_per_second": 9.206, "eval_wer": 0.1689802089059923, "step": 4200 }, { "epoch": 14.75, "learning_rate": 0.0003891843971631206, "loss": 0.1064, "step": 4250 }, { "epoch": 14.93, "learning_rate": 0.00038475177304964536, "loss": 0.112, "step": 4300 }, { "epoch": 14.93, "eval_loss": 27.2677059173584, "eval_runtime": 182.8419, "eval_samples_per_second": 9.188, "eval_wer": 0.17550852116547552, "step": 4300 }, { "epoch": 15.1, "learning_rate": 0.0003803191489361702, "loss": 0.1037, "step": 4350 }, { "epoch": 15.28, "learning_rate": 0.00037588652482269504, "loss": 0.1002, "step": 4400 }, { "epoch": 15.28, "eval_loss": 25.244844436645508, "eval_runtime": 182.1487, "eval_samples_per_second": 9.223, "eval_wer": 0.17021715228147333, "step": 4400 }, { "epoch": 15.45, "learning_rate": 0.0003714539007092199, "loss": 0.0889, "step": 4450 }, { "epoch": 15.62, "learning_rate": 0.0003670212765957447, "loss": 0.0893, "step": 4500 }, { "epoch": 15.62, "eval_loss": 31.379478454589844, "eval_runtime": 181.9808, "eval_samples_per_second": 9.232, "eval_wer": 0.17145409565695438, "step": 4500 }, { "epoch": 15.8, "learning_rate": 0.0003625886524822695, "loss": 0.0925, "step": 4550 }, { "epoch": 15.97, "learning_rate": 0.00035815602836879434, "loss": 0.0911, "step": 4600 }, { "epoch": 15.97, "eval_loss": 29.340713500976562, "eval_runtime": 182.5822, "eval_samples_per_second": 9.201, "eval_wer": 0.1657504123144585, "step": 4600 }, { "epoch": 16.15, "learning_rate": 0.0003537234042553192, "loss": 0.0945, "step": 4650 }, { "epoch": 16.32, "learning_rate": 0.00034929078014184396, "loss": 0.0798, "step": 4700 }, { "epoch": 16.32, "eval_loss": 27.59283447265625, "eval_runtime": 183.4998, "eval_samples_per_second": 9.155, "eval_wer": 0.16877405167674547, "step": 4700 }, { "epoch": 16.49, "learning_rate": 0.0003448581560283688, "loss": 0.0868, "step": 4750 }, { "epoch": 16.66, "learning_rate": 0.00034042553191489364, "loss": 0.0893, "step": 4800 }, { "epoch": 16.66, "eval_loss": 26.155086517333984, "eval_runtime": 183.3578, "eval_samples_per_second": 9.162, "eval_wer": 0.16877405167674547, "step": 4800 }, { "epoch": 16.84, "learning_rate": 0.0003359929078014184, "loss": 0.0844, "step": 4850 }, { "epoch": 17.01, "learning_rate": 0.00033156028368794326, "loss": 0.088, "step": 4900 }, { "epoch": 17.01, "eval_loss": 27.282428741455078, "eval_runtime": 183.8154, "eval_samples_per_second": 9.14, "eval_wer": 0.16870533260032985, "step": 4900 }, { "epoch": 17.19, "learning_rate": 0.0003271276595744681, "loss": 0.0844, "step": 4950 }, { "epoch": 17.36, "learning_rate": 0.00032269503546099293, "loss": 0.0647, "step": 5000 }, { "epoch": 17.36, "eval_loss": 27.251529693603516, "eval_runtime": 183.0422, "eval_samples_per_second": 9.178, "eval_wer": 0.1663001649257834, "step": 5000 }, { "epoch": 17.53, "learning_rate": 0.0003182624113475177, "loss": 0.0811, "step": 5050 }, { "epoch": 17.71, "learning_rate": 0.00031382978723404256, "loss": 0.0727, "step": 5100 }, { "epoch": 17.71, "eval_loss": 24.84876823425293, "eval_runtime": 182.8979, "eval_samples_per_second": 9.185, "eval_wer": 0.165131940626718, "step": 5100 }, { "epoch": 17.88, "learning_rate": 0.0003093971631205674, "loss": 0.0744, "step": 5150 }, { "epoch": 18.06, "learning_rate": 0.0003049645390070922, "loss": 0.0801, "step": 5200 }, { "epoch": 18.06, "eval_loss": 26.26492691040039, "eval_runtime": 182.8513, "eval_samples_per_second": 9.188, "eval_wer": 0.1648570643210555, "step": 5200 }, { "epoch": 18.23, "learning_rate": 0.000300531914893617, "loss": 0.0669, "step": 5250 }, { "epoch": 18.4, "learning_rate": 0.00029609929078014185, "loss": 0.0613, "step": 5300 }, { "epoch": 18.4, "eval_loss": 25.977203369140625, "eval_runtime": 182.5431, "eval_samples_per_second": 9.203, "eval_wer": 0.16114623419461244, "step": 5300 }, { "epoch": 18.57, "learning_rate": 0.0002916666666666667, "loss": 0.0641, "step": 5350 }, { "epoch": 18.75, "learning_rate": 0.00028723404255319153, "loss": 0.0588, "step": 5400 }, { "epoch": 18.75, "eval_loss": 26.410762786865234, "eval_runtime": 182.6733, "eval_samples_per_second": 9.197, "eval_wer": 0.1667811984606927, "step": 5400 }, { "epoch": 18.92, "learning_rate": 0.0002828014184397163, "loss": 0.0667, "step": 5450 }, { "epoch": 19.1, "learning_rate": 0.00027836879432624115, "loss": 0.0768, "step": 5500 }, { "epoch": 19.1, "eval_loss": 26.26220703125, "eval_runtime": 182.9701, "eval_samples_per_second": 9.182, "eval_wer": 0.15874106652006598, "step": 5500 }, { "epoch": 19.27, "learning_rate": 0.00027393617021276593, "loss": 0.0746, "step": 5550 }, { "epoch": 19.44, "learning_rate": 0.00026950354609929077, "loss": 0.0616, "step": 5600 }, { "epoch": 19.44, "eval_loss": 23.946300506591797, "eval_runtime": 183.2503, "eval_samples_per_second": 9.168, "eval_wer": 0.15681693238042882, "step": 5600 }, { "epoch": 19.62, "learning_rate": 0.0002650709219858156, "loss": 0.061, "step": 5650 }, { "epoch": 19.79, "learning_rate": 0.00026063829787234045, "loss": 0.066, "step": 5700 }, { "epoch": 19.79, "eval_loss": 27.404529571533203, "eval_runtime": 183.1378, "eval_samples_per_second": 9.173, "eval_wer": 0.16004672897196262, "step": 5700 }, { "epoch": 19.96, "learning_rate": 0.0002562056737588653, "loss": 0.0606, "step": 5750 }, { "epoch": 20.14, "learning_rate": 0.00025177304964539007, "loss": 0.0499, "step": 5800 }, { "epoch": 20.14, "eval_loss": 26.04939079284668, "eval_runtime": 182.3343, "eval_samples_per_second": 9.214, "eval_wer": 0.15805387575590985, "step": 5800 }, { "epoch": 20.31, "learning_rate": 0.0002473404255319149, "loss": 0.0635, "step": 5850 }, { "epoch": 20.48, "learning_rate": 0.00024290780141843972, "loss": 0.0576, "step": 5900 }, { "epoch": 20.48, "eval_loss": 25.570714950561523, "eval_runtime": 182.4858, "eval_samples_per_second": 9.206, "eval_wer": 0.1499450247388675, "step": 5900 }, { "epoch": 20.66, "learning_rate": 0.00023847517730496453, "loss": 0.0535, "step": 5950 }, { "epoch": 20.83, "learning_rate": 0.00023404255319148937, "loss": 0.0538, "step": 6000 }, { "epoch": 20.83, "eval_loss": 26.44669532775879, "eval_runtime": 182.7081, "eval_samples_per_second": 9.195, "eval_wer": 0.15276250687190765, "step": 6000 }, { "epoch": 21.01, "learning_rate": 0.0002296099290780142, "loss": 0.06, "step": 6050 }, { "epoch": 21.18, "learning_rate": 0.00022517730496453902, "loss": 0.0547, "step": 6100 }, { "epoch": 21.18, "eval_loss": 26.02959442138672, "eval_runtime": 183.3837, "eval_samples_per_second": 9.161, "eval_wer": 0.15083837273227046, "step": 6100 }, { "epoch": 21.35, "learning_rate": 0.00022074468085106383, "loss": 0.0506, "step": 6150 }, { "epoch": 21.53, "learning_rate": 0.00021631205673758867, "loss": 0.0578, "step": 6200 }, { "epoch": 21.53, "eval_loss": 25.79215431213379, "eval_runtime": 182.8615, "eval_samples_per_second": 9.187, "eval_wer": 0.1515255634964266, "step": 6200 }, { "epoch": 21.7, "learning_rate": 0.00021187943262411348, "loss": 0.0504, "step": 6250 }, { "epoch": 21.87, "learning_rate": 0.0002074468085106383, "loss": 0.0425, "step": 6300 }, { "epoch": 21.87, "eval_loss": 26.764015197753906, "eval_runtime": 182.7224, "eval_samples_per_second": 9.194, "eval_wer": 0.15592358438702583, "step": 6300 }, { "epoch": 22.05, "learning_rate": 0.00020301418439716313, "loss": 0.0509, "step": 6350 }, { "epoch": 22.22, "learning_rate": 0.00019858156028368796, "loss": 0.0473, "step": 6400 }, { "epoch": 22.22, "eval_loss": 26.02473258972168, "eval_runtime": 183.8649, "eval_samples_per_second": 9.137, "eval_wer": 0.15028862012094557, "step": 6400 }, { "epoch": 22.39, "learning_rate": 0.00019414893617021275, "loss": 0.039, "step": 6450 }, { "epoch": 22.57, "learning_rate": 0.00018971631205673758, "loss": 0.0519, "step": 6500 }, { "epoch": 22.57, "eval_loss": 26.268949508666992, "eval_runtime": 183.1205, "eval_samples_per_second": 9.174, "eval_wer": 0.15166300164925783, "step": 6500 }, { "epoch": 22.74, "learning_rate": 0.00018528368794326242, "loss": 0.0533, "step": 6550 }, { "epoch": 22.91, "learning_rate": 0.00018085106382978726, "loss": 0.0412, "step": 6600 }, { "epoch": 22.91, "eval_loss": 26.845340728759766, "eval_runtime": 183.0902, "eval_samples_per_second": 9.176, "eval_wer": 0.15063221550302364, "step": 6600 }, { "epoch": 23.09, "learning_rate": 0.00017641843971631204, "loss": 0.0535, "step": 6650 }, { "epoch": 23.26, "learning_rate": 0.00017198581560283688, "loss": 0.0449, "step": 6700 }, { "epoch": 23.26, "eval_loss": 24.5848445892334, "eval_runtime": 183.5855, "eval_samples_per_second": 9.151, "eval_wer": 0.14898295766904893, "step": 6700 }, { "epoch": 23.44, "learning_rate": 0.00016755319148936172, "loss": 0.0366, "step": 6750 }, { "epoch": 23.61, "learning_rate": 0.00016312056737588653, "loss": 0.0419, "step": 6800 }, { "epoch": 23.61, "eval_loss": 26.725446701049805, "eval_runtime": 182.6943, "eval_samples_per_second": 9.196, "eval_wer": 0.15063221550302364, "step": 6800 }, { "epoch": 23.78, "learning_rate": 0.00015868794326241134, "loss": 0.0404, "step": 6850 }, { "epoch": 23.96, "learning_rate": 0.00015425531914893618, "loss": 0.0417, "step": 6900 }, { "epoch": 23.96, "eval_loss": 26.662160873413086, "eval_runtime": 182.9944, "eval_samples_per_second": 9.181, "eval_wer": 0.14444749862561848, "step": 6900 }, { "epoch": 24.13, "learning_rate": 0.000149822695035461, "loss": 0.0341, "step": 6950 }, { "epoch": 24.3, "learning_rate": 0.0001453900709219858, "loss": 0.0383, "step": 7000 }, { "epoch": 24.3, "eval_loss": 25.917531967163086, "eval_runtime": 182.9271, "eval_samples_per_second": 9.184, "eval_wer": 0.14699010445299615, "step": 7000 }, { "epoch": 24.48, "learning_rate": 0.00014095744680851064, "loss": 0.0361, "step": 7050 }, { "epoch": 24.65, "learning_rate": 0.00013652482269503548, "loss": 0.037, "step": 7100 }, { "epoch": 24.65, "eval_loss": 26.451175689697266, "eval_runtime": 182.8419, "eval_samples_per_second": 9.188, "eval_wer": 0.14314183617372184, "step": 7100 }, { "epoch": 24.82, "learning_rate": 0.0001320921985815603, "loss": 0.0358, "step": 7150 }, { "epoch": 25.0, "learning_rate": 0.0001276595744680851, "loss": 0.0421, "step": 7200 }, { "epoch": 25.0, "eval_loss": 24.637380599975586, "eval_runtime": 182.468, "eval_samples_per_second": 9.207, "eval_wer": 0.14307311709730622, "step": 7200 }, { "epoch": 25.17, "learning_rate": 0.00012322695035460994, "loss": 0.034, "step": 7250 }, { "epoch": 25.35, "learning_rate": 0.00011879432624113476, "loss": 0.0366, "step": 7300 }, { "epoch": 25.35, "eval_loss": 25.25677490234375, "eval_runtime": 183.0484, "eval_samples_per_second": 9.178, "eval_wer": 0.14369158878504673, "step": 7300 }, { "epoch": 25.52, "learning_rate": 0.00011436170212765957, "loss": 0.0358, "step": 7350 }, { "epoch": 25.69, "learning_rate": 0.00010992907801418441, "loss": 0.0411, "step": 7400 }, { "epoch": 25.69, "eval_loss": 23.710235595703125, "eval_runtime": 183.8494, "eval_samples_per_second": 9.138, "eval_wer": 0.1435541506322155, "step": 7400 }, { "epoch": 25.87, "learning_rate": 0.00010549645390070922, "loss": 0.0324, "step": 7450 }, { "epoch": 26.04, "learning_rate": 0.00010106382978723403, "loss": 0.0395, "step": 7500 }, { "epoch": 26.04, "eval_loss": 25.433012008666992, "eval_runtime": 183.0989, "eval_samples_per_second": 9.175, "eval_wer": 0.14108026388125344, "step": 7500 }, { "epoch": 26.21, "learning_rate": 9.663120567375887e-05, "loss": 0.0373, "step": 7550 }, { "epoch": 26.39, "learning_rate": 9.219858156028368e-05, "loss": 0.0378, "step": 7600 }, { "epoch": 26.39, "eval_loss": 24.688125610351562, "eval_runtime": 187.9819, "eval_samples_per_second": 8.937, "eval_wer": 0.13963716327652556, "step": 7600 }, { "epoch": 26.56, "learning_rate": 8.776595744680852e-05, "loss": 0.0305, "step": 7650 }, { "epoch": 26.73, "learning_rate": 8.333333333333333e-05, "loss": 0.0295, "step": 7700 }, { "epoch": 26.73, "eval_loss": 24.53731918334961, "eval_runtime": 184.8939, "eval_samples_per_second": 9.086, "eval_wer": 0.14114898295766906, "step": 7700 }, { "epoch": 26.91, "learning_rate": 7.890070921985815e-05, "loss": 0.0391, "step": 7750 }, { "epoch": 27.08, "learning_rate": 7.446808510638298e-05, "loss": 0.0295, "step": 7800 }, { "epoch": 27.08, "eval_loss": 24.03723907470703, "eval_runtime": 183.5479, "eval_samples_per_second": 9.153, "eval_wer": 0.14183617372182517, "step": 7800 }, { "epoch": 27.26, "learning_rate": 7.00354609929078e-05, "loss": 0.0367, "step": 7850 }, { "epoch": 27.43, "learning_rate": 6.560283687943263e-05, "loss": 0.0357, "step": 7900 }, { "epoch": 27.43, "eval_loss": 23.551311492919922, "eval_runtime": 186.1508, "eval_samples_per_second": 9.025, "eval_wer": 0.1392935678944475, "step": 7900 }, { "epoch": 27.6, "learning_rate": 6.117021276595745e-05, "loss": 0.0278, "step": 7950 }, { "epoch": 27.78, "learning_rate": 5.673758865248227e-05, "loss": 0.0292, "step": 8000 }, { "epoch": 27.78, "eval_loss": 26.10912322998047, "eval_runtime": 183.6318, "eval_samples_per_second": 9.149, "eval_wer": 0.1403243540406817, "step": 8000 }, { "epoch": 27.95, "learning_rate": 5.2304964539007095e-05, "loss": 0.0293, "step": 8050 }, { "epoch": 28.12, "learning_rate": 4.787234042553191e-05, "loss": 0.0247, "step": 8100 }, { "epoch": 28.12, "eval_loss": 25.034645080566406, "eval_runtime": 183.9776, "eval_samples_per_second": 9.132, "eval_wer": 0.13688840021990104, "step": 8100 }, { "epoch": 28.3, "learning_rate": 4.343971631205674e-05, "loss": 0.0334, "step": 8150 }, { "epoch": 28.47, "learning_rate": 3.900709219858156e-05, "loss": 0.0238, "step": 8200 }, { "epoch": 28.47, "eval_loss": 24.014360427856445, "eval_runtime": 183.9625, "eval_samples_per_second": 9.132, "eval_wer": 0.1355827377680044, "step": 8200 }, { "epoch": 28.64, "learning_rate": 3.4574468085106386e-05, "loss": 0.0342, "step": 8250 }, { "epoch": 28.82, "learning_rate": 3.0141843971631203e-05, "loss": 0.0278, "step": 8300 }, { "epoch": 28.82, "eval_loss": 24.382017135620117, "eval_runtime": 183.4893, "eval_samples_per_second": 9.156, "eval_wer": 0.13585761407366684, "step": 8300 }, { "epoch": 28.99, "learning_rate": 2.5709219858156028e-05, "loss": 0.0209, "step": 8350 }, { "epoch": 29.17, "learning_rate": 2.1276595744680852e-05, "loss": 0.0289, "step": 8400 }, { "epoch": 29.17, "eval_loss": 25.17869758605957, "eval_runtime": 184.0892, "eval_samples_per_second": 9.126, "eval_wer": 0.13551401869158877, "step": 8400 }, { "epoch": 29.34, "learning_rate": 1.6843971631205673e-05, "loss": 0.0306, "step": 8450 }, { "epoch": 29.51, "learning_rate": 1.2411347517730498e-05, "loss": 0.0238, "step": 8500 }, { "epoch": 29.51, "eval_loss": 25.424150466918945, "eval_runtime": 182.9499, "eval_samples_per_second": 9.183, "eval_wer": 0.1351704233095107, "step": 8500 } ], "max_steps": 8640, "num_train_epochs": 30, "total_flos": 1.2862009633623437e+19, "trial_name": null, "trial_params": null }