{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.999153259949196, "global_step": 17700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1.8749999999999998e-06, "loss": 9.7932, "step": 50 }, { "epoch": 0.11, "learning_rate": 3.7125e-06, "loss": 9.6218, "step": 100 }, { "epoch": 0.17, "learning_rate": 5.549999999999999e-06, "loss": 7.6384, "step": 150 }, { "epoch": 0.23, "learning_rate": 7.425e-06, "loss": 5.5724, "step": 200 }, { "epoch": 0.28, "learning_rate": 9.299999999999999e-06, "loss": 4.4527, "step": 250 }, { "epoch": 0.28, "eval_loss": 4.014413833618164, "eval_runtime": 432.9366, "eval_samples_per_second": 14.339, "eval_steps_per_second": 0.896, "eval_wer": 1.0, "step": 250 }, { "epoch": 0.34, "learning_rate": 1.1174999999999999e-05, "loss": 3.996, "step": 300 }, { "epoch": 0.4, "learning_rate": 1.3049999999999999e-05, "loss": 3.6961, "step": 350 }, { "epoch": 0.45, "learning_rate": 1.4925e-05, "loss": 3.4442, "step": 400 }, { "epoch": 0.51, "learning_rate": 1.68e-05, "loss": 3.3442, "step": 450 }, { "epoch": 0.56, "learning_rate": 1.8675e-05, "loss": 3.1828, "step": 500 }, { "epoch": 0.56, "eval_loss": 3.136876106262207, "eval_runtime": 432.521, "eval_samples_per_second": 14.353, "eval_steps_per_second": 0.897, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.62, "learning_rate": 2.055e-05, "loss": 3.1052, "step": 550 }, { "epoch": 0.68, "learning_rate": 2.2424999999999996e-05, "loss": 3.0545, "step": 600 }, { "epoch": 0.73, "learning_rate": 2.4299999999999998e-05, "loss": 3.0155, "step": 650 }, { "epoch": 0.79, "learning_rate": 2.6174999999999996e-05, "loss": 3.0148, "step": 700 }, { "epoch": 0.85, "learning_rate": 2.8049999999999997e-05, "loss": 2.9927, "step": 750 }, { "epoch": 0.85, "eval_loss": 3.0182671546936035, "eval_runtime": 435.1578, "eval_samples_per_second": 14.266, "eval_steps_per_second": 0.892, "eval_wer": 1.0, "step": 750 }, { "epoch": 0.9, "learning_rate": 2.9925e-05, "loss": 2.9829, "step": 800 }, { "epoch": 0.96, "learning_rate": 3.1799999999999994e-05, "loss": 2.9876, "step": 850 }, { "epoch": 1.02, "learning_rate": 3.3675e-05, "loss": 3.0304, "step": 900 }, { "epoch": 1.07, "learning_rate": 3.555e-05, "loss": 2.9783, "step": 950 }, { "epoch": 1.13, "learning_rate": 3.7424999999999995e-05, "loss": 2.9591, "step": 1000 }, { "epoch": 1.13, "eval_loss": 2.999102830886841, "eval_runtime": 430.4872, "eval_samples_per_second": 14.421, "eval_steps_per_second": 0.901, "eval_wer": 1.0, "step": 1000 }, { "epoch": 1.19, "learning_rate": 3.93e-05, "loss": 2.9559, "step": 1050 }, { "epoch": 1.24, "learning_rate": 4.1175e-05, "loss": 2.9388, "step": 1100 }, { "epoch": 1.3, "learning_rate": 4.3049999999999996e-05, "loss": 2.9321, "step": 1150 }, { "epoch": 1.36, "learning_rate": 4.4924999999999994e-05, "loss": 2.9205, "step": 1200 }, { "epoch": 1.41, "learning_rate": 4.68e-05, "loss": 2.8989, "step": 1250 }, { "epoch": 1.41, "eval_loss": 2.9000213146209717, "eval_runtime": 431.5059, "eval_samples_per_second": 14.387, "eval_steps_per_second": 0.899, "eval_wer": 0.999990658308967, "step": 1250 }, { "epoch": 1.47, "learning_rate": 4.8675e-05, "loss": 2.8682, "step": 1300 }, { "epoch": 1.52, "learning_rate": 5.055e-05, "loss": 2.8476, "step": 1350 }, { "epoch": 1.58, "learning_rate": 5.2424999999999994e-05, "loss": 2.7956, "step": 1400 }, { "epoch": 1.64, "learning_rate": 5.429999999999999e-05, "loss": 2.6754, "step": 1450 }, { "epoch": 1.69, "learning_rate": 5.6175e-05, "loss": 2.4286, "step": 1500 }, { "epoch": 1.69, "eval_loss": 1.7688498497009277, "eval_runtime": 430.3663, "eval_samples_per_second": 14.425, "eval_steps_per_second": 0.902, "eval_wer": 0.9550384410586005, "step": 1500 }, { "epoch": 1.75, "learning_rate": 5.8049999999999995e-05, "loss": 2.218, "step": 1550 }, { "epoch": 1.81, "learning_rate": 5.9925e-05, "loss": 2.0095, "step": 1600 }, { "epoch": 1.86, "learning_rate": 6.18e-05, "loss": 1.8416, "step": 1650 }, { "epoch": 1.92, "learning_rate": 6.367499999999999e-05, "loss": 1.7642, "step": 1700 }, { "epoch": 1.98, "learning_rate": 6.555e-05, "loss": 1.6765, "step": 1750 }, { "epoch": 1.98, "eval_loss": 0.6841917037963867, "eval_runtime": 433.019, "eval_samples_per_second": 14.337, "eval_steps_per_second": 0.896, "eval_wer": 0.48551570805347183, "step": 1750 }, { "epoch": 2.03, "learning_rate": 6.7425e-05, "loss": 1.5994, "step": 1800 }, { "epoch": 2.09, "learning_rate": 6.93e-05, "loss": 1.5522, "step": 1850 }, { "epoch": 2.15, "learning_rate": 7.1175e-05, "loss": 1.52, "step": 1900 }, { "epoch": 2.2, "learning_rate": 7.304999999999999e-05, "loss": 1.5086, "step": 1950 }, { "epoch": 2.26, "learning_rate": 7.492499999999999e-05, "loss": 1.4521, "step": 2000 }, { "epoch": 2.26, "eval_loss": 0.5096011757850647, "eval_runtime": 431.8266, "eval_samples_per_second": 14.376, "eval_steps_per_second": 0.899, "eval_wer": 0.3735835660971349, "step": 2000 }, { "epoch": 2.32, "learning_rate": 7.477070063694266e-05, "loss": 1.4457, "step": 2050 }, { "epoch": 2.37, "learning_rate": 7.453184713375795e-05, "loss": 1.4276, "step": 2100 }, { "epoch": 2.43, "learning_rate": 7.429299363057323e-05, "loss": 1.4028, "step": 2150 }, { "epoch": 2.49, "learning_rate": 7.405414012738853e-05, "loss": 1.3887, "step": 2200 }, { "epoch": 2.54, "learning_rate": 7.38152866242038e-05, "loss": 1.3589, "step": 2250 }, { "epoch": 2.54, "eval_loss": 0.44788965582847595, "eval_runtime": 430.2855, "eval_samples_per_second": 14.428, "eval_steps_per_second": 0.902, "eval_wer": 0.3335450783300793, "step": 2250 }, { "epoch": 2.6, "learning_rate": 7.35764331210191e-05, "loss": 1.3935, "step": 2300 }, { "epoch": 2.65, "learning_rate": 7.333757961783438e-05, "loss": 1.3425, "step": 2350 }, { "epoch": 2.71, "learning_rate": 7.309872611464967e-05, "loss": 1.3657, "step": 2400 }, { "epoch": 2.77, "learning_rate": 7.285987261146495e-05, "loss": 1.3645, "step": 2450 }, { "epoch": 2.82, "learning_rate": 7.262101910828025e-05, "loss": 1.3136, "step": 2500 }, { "epoch": 2.82, "eval_loss": 0.40564054250717163, "eval_runtime": 428.4501, "eval_samples_per_second": 14.489, "eval_steps_per_second": 0.906, "eval_wer": 0.3123020729212402, "step": 2500 }, { "epoch": 2.88, "learning_rate": 7.238216560509553e-05, "loss": 1.3415, "step": 2550 }, { "epoch": 2.94, "learning_rate": 7.214331210191082e-05, "loss": 1.3345, "step": 2600 }, { "epoch": 2.99, "learning_rate": 7.19044585987261e-05, "loss": 1.3283, "step": 2650 }, { "epoch": 3.05, "learning_rate": 7.16656050955414e-05, "loss": 1.2788, "step": 2700 }, { "epoch": 3.11, "learning_rate": 7.142675159235667e-05, "loss": 1.2856, "step": 2750 }, { "epoch": 3.11, "eval_loss": 0.38699424266815186, "eval_runtime": 430.1514, "eval_samples_per_second": 14.432, "eval_steps_per_second": 0.902, "eval_wer": 0.29870991246835504, "step": 2750 }, { "epoch": 3.16, "learning_rate": 7.118789808917197e-05, "loss": 1.2817, "step": 2800 }, { "epoch": 3.22, "learning_rate": 7.094904458598725e-05, "loss": 1.2502, "step": 2850 }, { "epoch": 3.28, "learning_rate": 7.071019108280254e-05, "loss": 1.2623, "step": 2900 }, { "epoch": 3.33, "learning_rate": 7.047133757961782e-05, "loss": 1.2302, "step": 2950 }, { "epoch": 3.39, "learning_rate": 7.023248407643311e-05, "loss": 1.2283, "step": 3000 }, { "epoch": 3.39, "eval_loss": 0.3645668029785156, "eval_runtime": 430.0013, "eval_samples_per_second": 14.437, "eval_steps_per_second": 0.902, "eval_wer": 0.2828290377124067, "step": 3000 }, { "epoch": 3.45, "learning_rate": 6.99936305732484e-05, "loss": 1.1993, "step": 3050 }, { "epoch": 3.5, "learning_rate": 6.975477707006369e-05, "loss": 1.2627, "step": 3100 }, { "epoch": 3.56, "learning_rate": 6.951592356687897e-05, "loss": 1.1969, "step": 3150 }, { "epoch": 3.62, "learning_rate": 6.927707006369426e-05, "loss": 1.2054, "step": 3200 }, { "epoch": 3.67, "learning_rate": 6.903821656050954e-05, "loss": 1.2053, "step": 3250 }, { "epoch": 3.67, "eval_loss": 0.3499177098274231, "eval_runtime": 429.329, "eval_samples_per_second": 14.46, "eval_steps_per_second": 0.904, "eval_wer": 0.2747578166599718, "step": 3250 }, { "epoch": 3.73, "learning_rate": 6.879936305732483e-05, "loss": 1.2144, "step": 3300 }, { "epoch": 3.78, "learning_rate": 6.856050955414011e-05, "loss": 1.1882, "step": 3350 }, { "epoch": 3.84, "learning_rate": 6.832165605095541e-05, "loss": 1.1901, "step": 3400 }, { "epoch": 3.9, "learning_rate": 6.808280254777069e-05, "loss": 1.2064, "step": 3450 }, { "epoch": 3.95, "learning_rate": 6.784394904458598e-05, "loss": 1.2087, "step": 3500 }, { "epoch": 3.95, "eval_loss": 0.3345482349395752, "eval_runtime": 430.4222, "eval_samples_per_second": 14.423, "eval_steps_per_second": 0.901, "eval_wer": 0.2602781955589601, "step": 3500 }, { "epoch": 4.01, "learning_rate": 6.760509554140126e-05, "loss": 1.1945, "step": 3550 }, { "epoch": 4.07, "learning_rate": 6.736624203821655e-05, "loss": 1.1674, "step": 3600 }, { "epoch": 4.12, "learning_rate": 6.712738853503183e-05, "loss": 1.2197, "step": 3650 }, { "epoch": 4.18, "learning_rate": 6.688853503184713e-05, "loss": 1.1832, "step": 3700 }, { "epoch": 4.24, "learning_rate": 6.664968152866241e-05, "loss": 1.2002, "step": 3750 }, { "epoch": 4.24, "eval_loss": 0.3320307731628418, "eval_runtime": 429.9654, "eval_samples_per_second": 14.438, "eval_steps_per_second": 0.902, "eval_wer": 0.25228170803478844, "step": 3750 }, { "epoch": 4.29, "learning_rate": 6.64108280254777e-05, "loss": 1.1655, "step": 3800 }, { "epoch": 4.35, "learning_rate": 6.617197452229298e-05, "loss": 1.1387, "step": 3850 }, { "epoch": 4.41, "learning_rate": 6.593312101910828e-05, "loss": 1.1344, "step": 3900 }, { "epoch": 4.46, "learning_rate": 6.569426751592356e-05, "loss": 1.169, "step": 3950 }, { "epoch": 4.52, "learning_rate": 6.545541401273885e-05, "loss": 1.1383, "step": 4000 }, { "epoch": 4.52, "eval_loss": 0.31172633171081543, "eval_runtime": 428.4618, "eval_samples_per_second": 14.489, "eval_steps_per_second": 0.906, "eval_wer": 0.24393957794239912, "step": 4000 }, { "epoch": 4.58, "learning_rate": 6.521656050955413e-05, "loss": 1.1241, "step": 4050 }, { "epoch": 4.63, "learning_rate": 6.497770700636942e-05, "loss": 1.1505, "step": 4100 }, { "epoch": 4.69, "learning_rate": 6.47388535031847e-05, "loss": 1.1309, "step": 4150 }, { "epoch": 4.75, "learning_rate": 6.45e-05, "loss": 1.1368, "step": 4200 }, { "epoch": 4.8, "learning_rate": 6.426114649681528e-05, "loss": 1.1364, "step": 4250 }, { "epoch": 4.8, "eval_loss": 0.3198467195034027, "eval_runtime": 427.239, "eval_samples_per_second": 14.531, "eval_steps_per_second": 0.908, "eval_wer": 0.2382878548674881, "step": 4250 }, { "epoch": 4.86, "learning_rate": 6.402229299363057e-05, "loss": 1.1185, "step": 4300 }, { "epoch": 4.91, "learning_rate": 6.378343949044585e-05, "loss": 1.1214, "step": 4350 }, { "epoch": 4.97, "learning_rate": 6.354458598726114e-05, "loss": 1.1188, "step": 4400 }, { "epoch": 5.03, "learning_rate": 6.330573248407642e-05, "loss": 1.1327, "step": 4450 }, { "epoch": 5.08, "learning_rate": 6.306687898089172e-05, "loss": 1.158, "step": 4500 }, { "epoch": 5.08, "eval_loss": 0.3070796728134155, "eval_runtime": 427.2037, "eval_samples_per_second": 14.532, "eval_steps_per_second": 0.908, "eval_wer": 0.23418685250404028, "step": 4500 }, { "epoch": 5.14, "learning_rate": 6.2828025477707e-05, "loss": 1.1221, "step": 4550 }, { "epoch": 5.2, "learning_rate": 6.258917197452229e-05, "loss": 1.1167, "step": 4600 }, { "epoch": 5.25, "learning_rate": 6.235031847133757e-05, "loss": 1.1067, "step": 4650 }, { "epoch": 5.31, "learning_rate": 6.211146496815286e-05, "loss": 1.099, "step": 4700 }, { "epoch": 5.37, "learning_rate": 6.187261146496814e-05, "loss": 1.108, "step": 4750 }, { "epoch": 5.37, "eval_loss": 0.3011206090450287, "eval_runtime": 430.4576, "eval_samples_per_second": 14.422, "eval_steps_per_second": 0.901, "eval_wer": 0.23136566181210122, "step": 4750 }, { "epoch": 5.42, "learning_rate": 6.163375796178344e-05, "loss": 1.1024, "step": 4800 }, { "epoch": 5.48, "learning_rate": 6.139490445859872e-05, "loss": 1.1039, "step": 4850 }, { "epoch": 5.54, "learning_rate": 6.115605095541401e-05, "loss": 1.1082, "step": 4900 }, { "epoch": 5.59, "learning_rate": 6.09171974522293e-05, "loss": 1.0982, "step": 4950 }, { "epoch": 5.65, "learning_rate": 6.0678343949044583e-05, "loss": 1.1025, "step": 5000 }, { "epoch": 5.65, "eval_loss": 0.28753861784935, "eval_runtime": 431.3779, "eval_samples_per_second": 14.391, "eval_steps_per_second": 0.899, "eval_wer": 0.2289368221435444, "step": 5000 }, { "epoch": 5.71, "learning_rate": 6.043949044585987e-05, "loss": 1.089, "step": 5050 }, { "epoch": 5.76, "learning_rate": 6.020063694267516e-05, "loss": 1.0792, "step": 5100 }, { "epoch": 5.82, "learning_rate": 5.9961783439490444e-05, "loss": 1.1054, "step": 5150 }, { "epoch": 5.87, "learning_rate": 5.972770700636942e-05, "loss": 1.078, "step": 5200 }, { "epoch": 5.93, "learning_rate": 5.948885350318471e-05, "loss": 1.0697, "step": 5250 }, { "epoch": 5.93, "eval_loss": 0.29261597990989685, "eval_runtime": 429.3286, "eval_samples_per_second": 14.46, "eval_steps_per_second": 0.904, "eval_wer": 0.22559249675376236, "step": 5250 }, { "epoch": 5.99, "learning_rate": 5.925e-05, "loss": 1.1183, "step": 5300 }, { "epoch": 6.05, "learning_rate": 5.9011146496815284e-05, "loss": 1.1614, "step": 5350 }, { "epoch": 6.1, "learning_rate": 5.877229299363057e-05, "loss": 1.075, "step": 5400 }, { "epoch": 6.16, "learning_rate": 5.853343949044586e-05, "loss": 1.0901, "step": 5450 }, { "epoch": 6.21, "learning_rate": 5.8294585987261144e-05, "loss": 1.0904, "step": 5500 }, { "epoch": 6.21, "eval_loss": 0.2695116698741913, "eval_runtime": 431.1678, "eval_samples_per_second": 14.398, "eval_steps_per_second": 0.9, "eval_wer": 0.22445281044774726, "step": 5500 }, { "epoch": 6.27, "learning_rate": 5.805573248407643e-05, "loss": 1.0577, "step": 5550 }, { "epoch": 6.33, "learning_rate": 5.781687898089172e-05, "loss": 1.0693, "step": 5600 }, { "epoch": 6.38, "learning_rate": 5.7578025477707004e-05, "loss": 1.0784, "step": 5650 }, { "epoch": 6.44, "learning_rate": 5.733917197452229e-05, "loss": 1.0754, "step": 5700 }, { "epoch": 6.5, "learning_rate": 5.710031847133758e-05, "loss": 1.0802, "step": 5750 }, { "epoch": 6.5, "eval_loss": 0.26020729541778564, "eval_runtime": 433.3184, "eval_samples_per_second": 14.327, "eval_steps_per_second": 0.895, "eval_wer": 0.21889450428316534, "step": 5750 }, { "epoch": 6.55, "learning_rate": 5.6861464968152864e-05, "loss": 1.0459, "step": 5800 }, { "epoch": 6.61, "learning_rate": 5.662261146496815e-05, "loss": 1.0492, "step": 5850 }, { "epoch": 6.67, "learning_rate": 5.638375796178344e-05, "loss": 1.0526, "step": 5900 }, { "epoch": 6.72, "learning_rate": 5.6144904458598724e-05, "loss": 1.079, "step": 5950 }, { "epoch": 6.78, "learning_rate": 5.590605095541401e-05, "loss": 1.0882, "step": 6000 }, { "epoch": 6.78, "eval_loss": 0.2602781653404236, "eval_runtime": 434.4762, "eval_samples_per_second": 14.288, "eval_steps_per_second": 0.893, "eval_wer": 0.21684867394695787, "step": 6000 }, { "epoch": 6.84, "learning_rate": 5.56671974522293e-05, "loss": 1.0691, "step": 6050 }, { "epoch": 6.89, "learning_rate": 5.5428343949044585e-05, "loss": 1.0728, "step": 6100 }, { "epoch": 6.95, "learning_rate": 5.518949044585987e-05, "loss": 1.0308, "step": 6150 }, { "epoch": 7.01, "learning_rate": 5.4955414012738844e-05, "loss": 1.0894, "step": 6200 }, { "epoch": 7.06, "learning_rate": 5.471656050955413e-05, "loss": 1.0881, "step": 6250 }, { "epoch": 7.06, "eval_loss": 0.25403761863708496, "eval_runtime": 433.991, "eval_samples_per_second": 14.304, "eval_steps_per_second": 0.894, "eval_wer": 0.2292544396386634, "step": 6250 }, { "epoch": 7.12, "learning_rate": 5.447770700636942e-05, "loss": 1.0295, "step": 6300 }, { "epoch": 7.17, "learning_rate": 5.4238853503184704e-05, "loss": 1.0389, "step": 6350 }, { "epoch": 7.23, "learning_rate": 5.399999999999999e-05, "loss": 1.0415, "step": 6400 }, { "epoch": 7.29, "learning_rate": 5.376114649681528e-05, "loss": 1.0492, "step": 6450 }, { "epoch": 7.34, "learning_rate": 5.3522292993630565e-05, "loss": 1.0378, "step": 6500 }, { "epoch": 7.34, "eval_loss": 0.2614484429359436, "eval_runtime": 432.0675, "eval_samples_per_second": 14.368, "eval_steps_per_second": 0.898, "eval_wer": 0.21932422207067923, "step": 6500 }, { "epoch": 7.4, "learning_rate": 5.328343949044585e-05, "loss": 1.0362, "step": 6550 }, { "epoch": 7.46, "learning_rate": 5.304458598726114e-05, "loss": 1.0444, "step": 6600 }, { "epoch": 7.51, "learning_rate": 5.2805732484076425e-05, "loss": 1.0626, "step": 6650 }, { "epoch": 7.57, "learning_rate": 5.256687898089171e-05, "loss": 1.0307, "step": 6700 }, { "epoch": 7.63, "learning_rate": 5.2328025477707e-05, "loss": 1.0397, "step": 6750 }, { "epoch": 7.63, "eval_loss": 0.27073222398757935, "eval_runtime": 432.0598, "eval_samples_per_second": 14.368, "eval_steps_per_second": 0.898, "eval_wer": 0.21041224882528237, "step": 6750 }, { "epoch": 7.68, "learning_rate": 5.2089171974522285e-05, "loss": 1.0481, "step": 6800 }, { "epoch": 7.74, "learning_rate": 5.185031847133757e-05, "loss": 1.042, "step": 6850 }, { "epoch": 7.8, "learning_rate": 5.161146496815286e-05, "loss": 1.0298, "step": 6900 }, { "epoch": 7.85, "learning_rate": 5.1372611464968145e-05, "loss": 1.0269, "step": 6950 }, { "epoch": 7.91, "learning_rate": 5.113375796178343e-05, "loss": 1.0296, "step": 7000 }, { "epoch": 7.91, "eval_loss": 0.248311385512352, "eval_runtime": 431.8203, "eval_samples_per_second": 14.376, "eval_steps_per_second": 0.899, "eval_wer": 0.2119256027726139, "step": 7000 }, { "epoch": 7.97, "learning_rate": 5.089490445859872e-05, "loss": 1.0276, "step": 7050 }, { "epoch": 8.02, "learning_rate": 5.0656050955414005e-05, "loss": 1.0481, "step": 7100 }, { "epoch": 8.08, "learning_rate": 5.041719745222929e-05, "loss": 1.006, "step": 7150 }, { "epoch": 8.14, "learning_rate": 5.017834394904458e-05, "loss": 1.0215, "step": 7200 }, { "epoch": 8.19, "learning_rate": 4.9939490445859866e-05, "loss": 1.0249, "step": 7250 }, { "epoch": 8.19, "eval_loss": 0.24828839302062988, "eval_runtime": 429.7696, "eval_samples_per_second": 14.445, "eval_steps_per_second": 0.903, "eval_wer": 0.20468579222210806, "step": 7250 }, { "epoch": 8.25, "learning_rate": 4.970063694267515e-05, "loss": 1.0109, "step": 7300 }, { "epoch": 8.3, "learning_rate": 4.946178343949044e-05, "loss": 1.0154, "step": 7350 }, { "epoch": 8.36, "learning_rate": 4.9222929936305726e-05, "loss": 1.0123, "step": 7400 }, { "epoch": 8.42, "learning_rate": 4.898407643312101e-05, "loss": 1.0126, "step": 7450 }, { "epoch": 8.47, "learning_rate": 4.87452229299363e-05, "loss": 1.013, "step": 7500 }, { "epoch": 8.47, "eval_loss": 0.24869437515735626, "eval_runtime": 430.836, "eval_samples_per_second": 14.409, "eval_steps_per_second": 0.901, "eval_wer": 0.20419068259736378, "step": 7500 }, { "epoch": 8.53, "learning_rate": 4.8506369426751586e-05, "loss": 1.0077, "step": 7550 }, { "epoch": 8.59, "learning_rate": 4.826751592356687e-05, "loss": 1.0256, "step": 7600 }, { "epoch": 8.64, "learning_rate": 4.802866242038216e-05, "loss": 1.0627, "step": 7650 }, { "epoch": 8.7, "learning_rate": 4.7789808917197446e-05, "loss": 0.9883, "step": 7700 }, { "epoch": 8.76, "learning_rate": 4.755095541401273e-05, "loss": 1.0064, "step": 7750 }, { "epoch": 8.76, "eval_loss": 0.24558775126934052, "eval_runtime": 430.9773, "eval_samples_per_second": 14.404, "eval_steps_per_second": 0.9, "eval_wer": 0.20164974263641205, "step": 7750 }, { "epoch": 8.81, "learning_rate": 4.731210191082802e-05, "loss": 1.0137, "step": 7800 }, { "epoch": 8.87, "learning_rate": 4.7073248407643306e-05, "loss": 1.0178, "step": 7850 }, { "epoch": 8.93, "learning_rate": 4.683439490445859e-05, "loss": 1.0035, "step": 7900 }, { "epoch": 8.98, "learning_rate": 4.659554140127388e-05, "loss": 1.0457, "step": 7950 }, { "epoch": 9.04, "learning_rate": 4.6356687898089167e-05, "loss": 1.0668, "step": 8000 }, { "epoch": 9.04, "eval_loss": 0.2397284209728241, "eval_runtime": 430.6925, "eval_samples_per_second": 14.414, "eval_steps_per_second": 0.901, "eval_wer": 0.19949181200780966, "step": 8000 }, { "epoch": 9.1, "learning_rate": 4.611783439490445e-05, "loss": 1.0054, "step": 8050 }, { "epoch": 9.15, "learning_rate": 4.587898089171974e-05, "loss": 1.0224, "step": 8100 }, { "epoch": 9.21, "learning_rate": 4.564012738853503e-05, "loss": 1.0019, "step": 8150 }, { "epoch": 9.27, "learning_rate": 4.5401273885350314e-05, "loss": 1.0033, "step": 8200 }, { "epoch": 9.32, "learning_rate": 4.51624203821656e-05, "loss": 1.0129, "step": 8250 }, { "epoch": 9.32, "eval_loss": 0.23742474615573883, "eval_runtime": 432.9935, "eval_samples_per_second": 14.337, "eval_steps_per_second": 0.896, "eval_wer": 0.19942642017057927, "step": 8250 }, { "epoch": 9.38, "learning_rate": 4.492356687898089e-05, "loss": 0.9864, "step": 8300 }, { "epoch": 9.43, "learning_rate": 4.4689490445859874e-05, "loss": 1.0021, "step": 8350 }, { "epoch": 9.49, "learning_rate": 4.445063694267516e-05, "loss": 1.0073, "step": 8400 }, { "epoch": 9.55, "learning_rate": 4.421178343949045e-05, "loss": 0.9999, "step": 8450 }, { "epoch": 9.6, "learning_rate": 4.3972929936305734e-05, "loss": 1.0164, "step": 8500 }, { "epoch": 9.6, "eval_loss": 0.2206413298845291, "eval_runtime": 431.5354, "eval_samples_per_second": 14.386, "eval_steps_per_second": 0.899, "eval_wer": 0.19915551113062488, "step": 8500 }, { "epoch": 9.66, "learning_rate": 4.373407643312102e-05, "loss": 0.9956, "step": 8550 }, { "epoch": 9.72, "learning_rate": 4.349522292993631e-05, "loss": 0.9662, "step": 8600 }, { "epoch": 9.77, "learning_rate": 4.3256369426751594e-05, "loss": 0.9781, "step": 8650 }, { "epoch": 9.83, "learning_rate": 4.301751592356688e-05, "loss": 0.9863, "step": 8700 }, { "epoch": 9.89, "learning_rate": 4.277866242038217e-05, "loss": 0.975, "step": 8750 }, { "epoch": 9.89, "eval_loss": 0.22473624348640442, "eval_runtime": 432.1534, "eval_samples_per_second": 14.365, "eval_steps_per_second": 0.898, "eval_wer": 0.19731519799714145, "step": 8750 }, { "epoch": 9.94, "learning_rate": 4.2539808917197454e-05, "loss": 0.9931, "step": 8800 }, { "epoch": 10.0, "learning_rate": 4.230095541401274e-05, "loss": 1.0101, "step": 8850 }, { "epoch": 10.06, "learning_rate": 4.206210191082803e-05, "loss": 1.0034, "step": 8900 }, { "epoch": 10.11, "learning_rate": 4.1823248407643314e-05, "loss": 1.0018, "step": 8950 }, { "epoch": 10.17, "learning_rate": 4.15843949044586e-05, "loss": 0.9849, "step": 9000 }, { "epoch": 10.17, "eval_loss": 0.23245184123516083, "eval_runtime": 431.4778, "eval_samples_per_second": 14.388, "eval_steps_per_second": 0.899, "eval_wer": 0.19526002596990108, "step": 9000 }, { "epoch": 10.23, "learning_rate": 4.134554140127389e-05, "loss": 0.9953, "step": 9050 }, { "epoch": 10.28, "learning_rate": 4.1106687898089175e-05, "loss": 0.9639, "step": 9100 }, { "epoch": 10.34, "learning_rate": 4.086783439490446e-05, "loss": 0.9862, "step": 9150 }, { "epoch": 10.4, "learning_rate": 4.062898089171975e-05, "loss": 1.0222, "step": 9200 }, { "epoch": 10.45, "learning_rate": 4.0390127388535035e-05, "loss": 0.9826, "step": 9250 }, { "epoch": 10.45, "eval_loss": 0.2301308959722519, "eval_runtime": 432.6762, "eval_samples_per_second": 14.348, "eval_steps_per_second": 0.897, "eval_wer": 0.1933730043812531, "step": 9250 }, { "epoch": 10.51, "learning_rate": 4.015127388535032e-05, "loss": 0.9867, "step": 9300 }, { "epoch": 10.56, "learning_rate": 3.991242038216561e-05, "loss": 0.9687, "step": 9350 }, { "epoch": 10.62, "learning_rate": 3.9673566878980895e-05, "loss": 0.9715, "step": 9400 }, { "epoch": 10.68, "learning_rate": 3.943471337579618e-05, "loss": 0.9914, "step": 9450 }, { "epoch": 10.73, "learning_rate": 3.919585987261147e-05, "loss": 0.9835, "step": 9500 }, { "epoch": 10.73, "eval_loss": 0.2191852629184723, "eval_runtime": 439.1976, "eval_samples_per_second": 14.135, "eval_steps_per_second": 0.883, "eval_wer": 0.19420441488318216, "step": 9500 }, { "epoch": 10.79, "learning_rate": 3.8957006369426755e-05, "loss": 0.9652, "step": 9550 }, { "epoch": 10.85, "learning_rate": 3.871815286624204e-05, "loss": 0.9614, "step": 9600 }, { "epoch": 10.9, "learning_rate": 3.847929936305733e-05, "loss": 0.97, "step": 9650 }, { "epoch": 10.96, "learning_rate": 3.8240445859872615e-05, "loss": 0.9764, "step": 9700 }, { "epoch": 11.02, "learning_rate": 3.80015923566879e-05, "loss": 0.9676, "step": 9750 }, { "epoch": 11.02, "eval_loss": 0.2265927493572235, "eval_runtime": 430.7748, "eval_samples_per_second": 14.411, "eval_steps_per_second": 0.901, "eval_wer": 0.19133651573607854, "step": 9750 }, { "epoch": 11.07, "learning_rate": 3.776273885350319e-05, "loss": 0.9609, "step": 9800 }, { "epoch": 11.13, "learning_rate": 3.7523885350318475e-05, "loss": 0.9721, "step": 9850 }, { "epoch": 11.19, "learning_rate": 3.7285031847133755e-05, "loss": 0.9669, "step": 9900 }, { "epoch": 11.24, "learning_rate": 3.704617834394904e-05, "loss": 0.9643, "step": 9950 }, { "epoch": 11.3, "learning_rate": 3.680732484076433e-05, "loss": 0.9627, "step": 10000 }, { "epoch": 11.3, "eval_loss": 0.2193416953086853, "eval_runtime": 432.6083, "eval_samples_per_second": 14.35, "eval_steps_per_second": 0.897, "eval_wer": 0.19205582594561268, "step": 10000 }, { "epoch": 11.36, "learning_rate": 3.6568471337579616e-05, "loss": 1.0179, "step": 10050 }, { "epoch": 11.41, "learning_rate": 3.63296178343949e-05, "loss": 0.9575, "step": 10100 }, { "epoch": 11.47, "learning_rate": 3.609076433121019e-05, "loss": 0.98, "step": 10150 }, { "epoch": 11.52, "learning_rate": 3.5851910828025476e-05, "loss": 0.9542, "step": 10200 }, { "epoch": 11.58, "learning_rate": 3.561305732484076e-05, "loss": 0.976, "step": 10250 }, { "epoch": 11.58, "eval_loss": 0.23090308904647827, "eval_runtime": 432.1501, "eval_samples_per_second": 14.365, "eval_steps_per_second": 0.898, "eval_wer": 0.1881790241669547, "step": 10250 }, { "epoch": 11.64, "learning_rate": 3.537420382165605e-05, "loss": 0.972, "step": 10300 }, { "epoch": 11.69, "learning_rate": 3.5135350318471336e-05, "loss": 0.9634, "step": 10350 }, { "epoch": 11.75, "learning_rate": 3.489649681528662e-05, "loss": 0.9682, "step": 10400 }, { "epoch": 11.81, "learning_rate": 3.465764331210191e-05, "loss": 0.9638, "step": 10450 }, { "epoch": 11.86, "learning_rate": 3.4418789808917196e-05, "loss": 0.969, "step": 10500 }, { "epoch": 11.86, "eval_loss": 0.2268366813659668, "eval_runtime": 433.2795, "eval_samples_per_second": 14.328, "eval_steps_per_second": 0.895, "eval_wer": 0.18859005857240277, "step": 10500 }, { "epoch": 11.92, "learning_rate": 3.417993630573248e-05, "loss": 0.9698, "step": 10550 }, { "epoch": 11.98, "learning_rate": 3.394108280254777e-05, "loss": 0.9369, "step": 10600 }, { "epoch": 12.03, "learning_rate": 3.3702229299363056e-05, "loss": 0.9699, "step": 10650 }, { "epoch": 12.09, "learning_rate": 3.346337579617834e-05, "loss": 1.0013, "step": 10700 }, { "epoch": 12.15, "learning_rate": 3.322929936305732e-05, "loss": 0.9611, "step": 10750 }, { "epoch": 12.15, "eval_loss": 0.2322191596031189, "eval_runtime": 429.3587, "eval_samples_per_second": 14.459, "eval_steps_per_second": 0.904, "eval_wer": 0.18626397750520798, "step": 10750 }, { "epoch": 12.2, "learning_rate": 3.299044585987261e-05, "loss": 0.9418, "step": 10800 }, { "epoch": 12.26, "learning_rate": 3.2751592356687896e-05, "loss": 0.9582, "step": 10850 }, { "epoch": 12.32, "learning_rate": 3.251273885350318e-05, "loss": 0.945, "step": 10900 }, { "epoch": 12.37, "learning_rate": 3.227388535031847e-05, "loss": 0.9386, "step": 10950 }, { "epoch": 12.43, "learning_rate": 3.2035031847133757e-05, "loss": 0.9397, "step": 11000 }, { "epoch": 12.43, "eval_loss": 0.21969455480575562, "eval_runtime": 432.2628, "eval_samples_per_second": 14.362, "eval_steps_per_second": 0.898, "eval_wer": 0.1843676142255271, "step": 11000 }, { "epoch": 12.49, "learning_rate": 3.179617834394904e-05, "loss": 0.9594, "step": 11050 }, { "epoch": 12.54, "learning_rate": 3.155732484076433e-05, "loss": 0.9467, "step": 11100 }, { "epoch": 12.6, "learning_rate": 3.131847133757962e-05, "loss": 0.9609, "step": 11150 }, { "epoch": 12.65, "learning_rate": 3.1079617834394904e-05, "loss": 0.9446, "step": 11200 }, { "epoch": 12.71, "learning_rate": 3.084076433121019e-05, "loss": 0.9601, "step": 11250 }, { "epoch": 12.71, "eval_loss": 0.22107979655265808, "eval_runtime": 432.9535, "eval_samples_per_second": 14.339, "eval_steps_per_second": 0.896, "eval_wer": 0.18711407138920289, "step": 11250 }, { "epoch": 12.77, "learning_rate": 3.060191082802548e-05, "loss": 0.9497, "step": 11300 }, { "epoch": 12.82, "learning_rate": 3.036305732484076e-05, "loss": 0.939, "step": 11350 }, { "epoch": 12.88, "learning_rate": 3.0124203821656047e-05, "loss": 0.9462, "step": 11400 }, { "epoch": 12.94, "learning_rate": 2.9885350318471334e-05, "loss": 0.9243, "step": 11450 }, { "epoch": 12.99, "learning_rate": 2.964649681528662e-05, "loss": 0.9718, "step": 11500 }, { "epoch": 12.99, "eval_loss": 0.20792651176452637, "eval_runtime": 429.7801, "eval_samples_per_second": 14.445, "eval_steps_per_second": 0.903, "eval_wer": 0.189823161788747, "step": 11500 }, { "epoch": 13.05, "learning_rate": 2.9407643312101907e-05, "loss": 0.9543, "step": 11550 }, { "epoch": 13.11, "learning_rate": 2.9168789808917194e-05, "loss": 0.9386, "step": 11600 }, { "epoch": 13.16, "learning_rate": 2.892993630573248e-05, "loss": 0.9662, "step": 11650 }, { "epoch": 13.22, "learning_rate": 2.8691082802547767e-05, "loss": 0.9426, "step": 11700 }, { "epoch": 13.28, "learning_rate": 2.8452229299363054e-05, "loss": 0.9347, "step": 11750 }, { "epoch": 13.28, "eval_loss": 0.2053879350423813, "eval_runtime": 427.5266, "eval_samples_per_second": 14.521, "eval_steps_per_second": 0.908, "eval_wer": 0.1842835390062309, "step": 11750 }, { "epoch": 13.33, "learning_rate": 2.821337579617834e-05, "loss": 0.9579, "step": 11800 }, { "epoch": 13.39, "learning_rate": 2.7974522292993628e-05, "loss": 0.9313, "step": 11850 }, { "epoch": 13.45, "learning_rate": 2.7735668789808914e-05, "loss": 0.9295, "step": 11900 }, { "epoch": 13.5, "learning_rate": 2.74968152866242e-05, "loss": 0.9437, "step": 11950 }, { "epoch": 13.56, "learning_rate": 2.7257961783439488e-05, "loss": 0.9377, "step": 12000 }, { "epoch": 13.56, "eval_loss": 0.20305366814136505, "eval_runtime": 429.8935, "eval_samples_per_second": 14.441, "eval_steps_per_second": 0.903, "eval_wer": 0.18423683055106635, "step": 12000 }, { "epoch": 13.62, "learning_rate": 2.7019108280254775e-05, "loss": 0.9273, "step": 12050 }, { "epoch": 13.67, "learning_rate": 2.678025477707006e-05, "loss": 0.9804, "step": 12100 }, { "epoch": 13.73, "learning_rate": 2.6541401273885348e-05, "loss": 0.9392, "step": 12150 }, { "epoch": 13.78, "learning_rate": 2.6302547770700635e-05, "loss": 0.9379, "step": 12200 }, { "epoch": 13.84, "learning_rate": 2.606369426751592e-05, "loss": 0.934, "step": 12250 }, { "epoch": 13.84, "eval_loss": 0.20586800575256348, "eval_runtime": 428.3313, "eval_samples_per_second": 14.493, "eval_steps_per_second": 0.906, "eval_wer": 0.18060291273926407, "step": 12250 }, { "epoch": 13.9, "learning_rate": 2.5824840764331208e-05, "loss": 0.9177, "step": 12300 }, { "epoch": 13.95, "learning_rate": 2.5585987261146495e-05, "loss": 0.9369, "step": 12350 }, { "epoch": 14.01, "learning_rate": 2.534713375796178e-05, "loss": 0.9438, "step": 12400 }, { "epoch": 14.07, "learning_rate": 2.510828025477707e-05, "loss": 0.9341, "step": 12450 }, { "epoch": 14.12, "learning_rate": 2.4869426751592355e-05, "loss": 0.9295, "step": 12500 }, { "epoch": 14.12, "eval_loss": 0.21221554279327393, "eval_runtime": 432.7246, "eval_samples_per_second": 14.346, "eval_steps_per_second": 0.897, "eval_wer": 0.18605846030248396, "step": 12500 }, { "epoch": 14.18, "learning_rate": 2.4630573248407642e-05, "loss": 0.9239, "step": 12550 }, { "epoch": 14.24, "learning_rate": 2.439171974522293e-05, "loss": 0.9235, "step": 12600 }, { "epoch": 14.29, "learning_rate": 2.4152866242038215e-05, "loss": 0.9631, "step": 12650 }, { "epoch": 14.35, "learning_rate": 2.3914012738853502e-05, "loss": 0.9467, "step": 12700 }, { "epoch": 14.41, "learning_rate": 2.367515923566879e-05, "loss": 0.935, "step": 12750 }, { "epoch": 14.41, "eval_loss": 0.20723822712898254, "eval_runtime": 429.493, "eval_samples_per_second": 14.454, "eval_steps_per_second": 0.903, "eval_wer": 0.17866918269545154, "step": 12750 }, { "epoch": 14.46, "learning_rate": 2.3436305732484076e-05, "loss": 0.9319, "step": 12800 }, { "epoch": 14.52, "learning_rate": 2.3197452229299362e-05, "loss": 0.9337, "step": 12850 }, { "epoch": 14.58, "learning_rate": 2.295859872611465e-05, "loss": 0.9259, "step": 12900 }, { "epoch": 14.63, "learning_rate": 2.2719745222929936e-05, "loss": 0.9228, "step": 12950 }, { "epoch": 14.69, "learning_rate": 2.2480891719745222e-05, "loss": 0.9021, "step": 13000 }, { "epoch": 14.69, "eval_loss": 0.21045178174972534, "eval_runtime": 428.8167, "eval_samples_per_second": 14.477, "eval_steps_per_second": 0.905, "eval_wer": 0.1781273646155427, "step": 13000 }, { "epoch": 14.75, "learning_rate": 2.224203821656051e-05, "loss": 0.9238, "step": 13050 }, { "epoch": 14.8, "learning_rate": 2.2003184713375796e-05, "loss": 0.9373, "step": 13100 }, { "epoch": 14.86, "learning_rate": 2.1764331210191083e-05, "loss": 0.9365, "step": 13150 }, { "epoch": 14.91, "learning_rate": 2.152547770700637e-05, "loss": 0.9656, "step": 13200 }, { "epoch": 14.97, "learning_rate": 2.1286624203821656e-05, "loss": 0.9193, "step": 13250 }, { "epoch": 14.97, "eval_loss": 0.20348267257213593, "eval_runtime": 430.2042, "eval_samples_per_second": 14.43, "eval_steps_per_second": 0.902, "eval_wer": 0.17860379085822115, "step": 13250 }, { "epoch": 15.03, "learning_rate": 2.1047770700636943e-05, "loss": 0.9366, "step": 13300 }, { "epoch": 15.08, "learning_rate": 2.080891719745223e-05, "loss": 0.9129, "step": 13350 }, { "epoch": 15.14, "learning_rate": 2.0570063694267513e-05, "loss": 0.9032, "step": 13400 }, { "epoch": 15.2, "learning_rate": 2.03312101910828e-05, "loss": 0.9152, "step": 13450 }, { "epoch": 15.25, "learning_rate": 2.0092356687898086e-05, "loss": 0.9214, "step": 13500 }, { "epoch": 15.25, "eval_loss": 0.2034832239151001, "eval_runtime": 432.039, "eval_samples_per_second": 14.369, "eval_steps_per_second": 0.898, "eval_wer": 0.17661401066821117, "step": 13500 }, { "epoch": 15.31, "learning_rate": 1.9853503184713373e-05, "loss": 0.9438, "step": 13550 }, { "epoch": 15.37, "learning_rate": 1.961464968152866e-05, "loss": 0.9262, "step": 13600 }, { "epoch": 15.42, "learning_rate": 1.9375796178343947e-05, "loss": 0.9157, "step": 13650 }, { "epoch": 15.48, "learning_rate": 1.9136942675159233e-05, "loss": 0.9299, "step": 13700 }, { "epoch": 15.54, "learning_rate": 1.889808917197452e-05, "loss": 0.9048, "step": 13750 }, { "epoch": 15.54, "eval_loss": 0.19639889895915985, "eval_runtime": 438.8483, "eval_samples_per_second": 14.146, "eval_steps_per_second": 0.884, "eval_wer": 0.17581062523938082, "step": 13750 }, { "epoch": 15.59, "learning_rate": 1.8659235668789807e-05, "loss": 0.9399, "step": 13800 }, { "epoch": 15.65, "learning_rate": 1.8420382165605094e-05, "loss": 0.9309, "step": 13850 }, { "epoch": 15.71, "learning_rate": 1.818152866242038e-05, "loss": 0.9646, "step": 13900 }, { "epoch": 15.76, "learning_rate": 1.7942675159235667e-05, "loss": 0.9095, "step": 13950 }, { "epoch": 15.82, "learning_rate": 1.7703821656050954e-05, "loss": 0.9006, "step": 14000 }, { "epoch": 15.82, "eval_loss": 0.19844159483909607, "eval_runtime": 435.4721, "eval_samples_per_second": 14.256, "eval_steps_per_second": 0.891, "eval_wer": 0.17574523340215045, "step": 14000 }, { "epoch": 15.87, "learning_rate": 1.746496815286624e-05, "loss": 0.8845, "step": 14050 }, { "epoch": 15.93, "learning_rate": 1.7226114649681527e-05, "loss": 0.8991, "step": 14100 }, { "epoch": 15.99, "learning_rate": 1.6987261146496814e-05, "loss": 0.9266, "step": 14150 }, { "epoch": 16.05, "learning_rate": 1.67484076433121e-05, "loss": 0.9535, "step": 14200 }, { "epoch": 16.1, "learning_rate": 1.6509554140127387e-05, "loss": 0.9027, "step": 14250 }, { "epoch": 16.1, "eval_loss": 0.20223206281661987, "eval_runtime": 434.737, "eval_samples_per_second": 14.28, "eval_steps_per_second": 0.892, "eval_wer": 0.17431595467411512, "step": 14250 }, { "epoch": 16.16, "learning_rate": 1.6270700636942674e-05, "loss": 0.9095, "step": 14300 }, { "epoch": 16.21, "learning_rate": 1.603184713375796e-05, "loss": 0.9024, "step": 14350 }, { "epoch": 16.27, "learning_rate": 1.5792993630573248e-05, "loss": 0.9135, "step": 14400 }, { "epoch": 16.33, "learning_rate": 1.5554140127388534e-05, "loss": 0.9013, "step": 14450 }, { "epoch": 16.38, "learning_rate": 1.531528662420382e-05, "loss": 0.9083, "step": 14500 }, { "epoch": 16.38, "eval_loss": 0.19693595170974731, "eval_runtime": 437.2683, "eval_samples_per_second": 14.197, "eval_steps_per_second": 0.887, "eval_wer": 0.1744000298934113, "step": 14500 }, { "epoch": 16.44, "learning_rate": 1.5076433121019106e-05, "loss": 0.9173, "step": 14550 }, { "epoch": 16.5, "learning_rate": 1.4837579617834393e-05, "loss": 0.9133, "step": 14600 }, { "epoch": 16.55, "learning_rate": 1.459872611464968e-05, "loss": 0.9161, "step": 14650 }, { "epoch": 16.61, "learning_rate": 1.4359872611464966e-05, "loss": 0.8844, "step": 14700 }, { "epoch": 16.67, "learning_rate": 1.4121019108280253e-05, "loss": 0.9761, "step": 14750 }, { "epoch": 16.67, "eval_loss": 0.19631367921829224, "eval_runtime": 434.2237, "eval_samples_per_second": 14.297, "eval_steps_per_second": 0.894, "eval_wer": 0.17276523396265192, "step": 14750 }, { "epoch": 16.72, "learning_rate": 1.388216560509554e-05, "loss": 0.9057, "step": 14800 }, { "epoch": 16.78, "learning_rate": 1.3643312101910826e-05, "loss": 0.9128, "step": 14850 }, { "epoch": 16.84, "learning_rate": 1.3404458598726113e-05, "loss": 0.9056, "step": 14900 }, { "epoch": 16.89, "learning_rate": 1.31656050955414e-05, "loss": 0.9024, "step": 14950 }, { "epoch": 16.95, "learning_rate": 1.2926751592356687e-05, "loss": 0.9311, "step": 15000 }, { "epoch": 16.95, "eval_loss": 0.19600756466388702, "eval_runtime": 438.9128, "eval_samples_per_second": 14.144, "eval_steps_per_second": 0.884, "eval_wer": 0.1736807196838772, "step": 15000 }, { "epoch": 17.01, "learning_rate": 1.2687898089171973e-05, "loss": 0.9372, "step": 15050 }, { "epoch": 17.06, "learning_rate": 1.244904458598726e-05, "loss": 0.8955, "step": 15100 }, { "epoch": 17.12, "learning_rate": 1.2210191082802547e-05, "loss": 0.909, "step": 15150 }, { "epoch": 17.17, "learning_rate": 1.1971337579617834e-05, "loss": 0.9092, "step": 15200 }, { "epoch": 17.23, "learning_rate": 1.173248407643312e-05, "loss": 0.886, "step": 15250 }, { "epoch": 17.23, "eval_loss": 0.1928754597902298, "eval_runtime": 438.297, "eval_samples_per_second": 14.164, "eval_steps_per_second": 0.885, "eval_wer": 0.17263445028819116, "step": 15250 }, { "epoch": 17.29, "learning_rate": 1.1493630573248407e-05, "loss": 0.9053, "step": 15300 }, { "epoch": 17.34, "learning_rate": 1.1254777070063694e-05, "loss": 0.9056, "step": 15350 }, { "epoch": 17.4, "learning_rate": 1.101592356687898e-05, "loss": 0.9219, "step": 15400 }, { "epoch": 17.46, "learning_rate": 1.0777070063694267e-05, "loss": 0.8967, "step": 15450 }, { "epoch": 17.51, "learning_rate": 1.0538216560509554e-05, "loss": 0.8969, "step": 15500 }, { "epoch": 17.51, "eval_loss": 0.1928360015153885, "eval_runtime": 442.1109, "eval_samples_per_second": 14.042, "eval_steps_per_second": 0.878, "eval_wer": 0.17337244387979112, "step": 15500 }, { "epoch": 17.57, "learning_rate": 1.029936305732484e-05, "loss": 0.8899, "step": 15550 }, { "epoch": 17.63, "learning_rate": 1.0060509554140127e-05, "loss": 0.9056, "step": 15600 }, { "epoch": 17.68, "learning_rate": 9.821656050955414e-06, "loss": 0.9048, "step": 15650 }, { "epoch": 17.74, "learning_rate": 9.582802547770701e-06, "loss": 0.9572, "step": 15700 }, { "epoch": 17.8, "learning_rate": 9.34872611464968e-06, "loss": 0.9084, "step": 15750 }, { "epoch": 17.8, "eval_loss": 0.19373278319835663, "eval_runtime": 446.3693, "eval_samples_per_second": 13.908, "eval_steps_per_second": 0.869, "eval_wer": 0.17133595523461656, "step": 15750 }, { "epoch": 17.85, "learning_rate": 9.109872611464967e-06, "loss": 0.8861, "step": 15800 }, { "epoch": 17.91, "learning_rate": 8.871019108280254e-06, "loss": 0.8842, "step": 15850 }, { "epoch": 17.97, "learning_rate": 8.63216560509554e-06, "loss": 0.8949, "step": 15900 }, { "epoch": 18.02, "learning_rate": 8.398089171974522e-06, "loss": 0.8977, "step": 15950 }, { "epoch": 18.08, "learning_rate": 8.159235668789809e-06, "loss": 0.8795, "step": 16000 }, { "epoch": 18.08, "eval_loss": 0.1977699100971222, "eval_runtime": 437.7611, "eval_samples_per_second": 14.181, "eval_steps_per_second": 0.886, "eval_wer": 0.17086887068297102, "step": 16000 }, { "epoch": 18.14, "learning_rate": 7.920382165605094e-06, "loss": 0.8984, "step": 16050 }, { "epoch": 18.19, "learning_rate": 7.68152866242038e-06, "loss": 0.9005, "step": 16100 }, { "epoch": 18.25, "learning_rate": 7.4426751592356675e-06, "loss": 0.8981, "step": 16150 }, { "epoch": 18.3, "learning_rate": 7.203821656050954e-06, "loss": 0.9029, "step": 16200 }, { "epoch": 18.36, "learning_rate": 6.964968152866241e-06, "loss": 0.8883, "step": 16250 }, { "epoch": 18.36, "eval_loss": 0.19563348591327667, "eval_runtime": 434.7761, "eval_samples_per_second": 14.279, "eval_steps_per_second": 0.892, "eval_wer": 0.17032705260306222, "step": 16250 }, { "epoch": 18.42, "learning_rate": 6.726114649681528e-06, "loss": 0.8919, "step": 16300 }, { "epoch": 18.47, "learning_rate": 6.4872611464968145e-06, "loss": 0.8978, "step": 16350 }, { "epoch": 18.53, "learning_rate": 6.248407643312101e-06, "loss": 0.8897, "step": 16400 }, { "epoch": 18.59, "learning_rate": 6.009554140127388e-06, "loss": 0.9477, "step": 16450 }, { "epoch": 18.64, "learning_rate": 5.770700636942675e-06, "loss": 0.8901, "step": 16500 }, { "epoch": 18.64, "eval_loss": 0.19332656264305115, "eval_runtime": 439.133, "eval_samples_per_second": 14.137, "eval_steps_per_second": 0.884, "eval_wer": 0.17053256980578624, "step": 16500 }, { "epoch": 18.7, "learning_rate": 5.531847133757961e-06, "loss": 0.8992, "step": 16550 }, { "epoch": 18.76, "learning_rate": 5.292993630573248e-06, "loss": 0.8988, "step": 16600 }, { "epoch": 18.81, "learning_rate": 5.054140127388535e-06, "loss": 0.8885, "step": 16650 }, { "epoch": 18.87, "learning_rate": 4.815286624203822e-06, "loss": 0.8837, "step": 16700 }, { "epoch": 18.93, "learning_rate": 4.576433121019108e-06, "loss": 0.8922, "step": 16750 }, { "epoch": 18.93, "eval_loss": 0.1962379515171051, "eval_runtime": 444.5287, "eval_samples_per_second": 13.965, "eval_steps_per_second": 0.873, "eval_wer": 0.17109307126776088, "step": 16750 }, { "epoch": 18.98, "learning_rate": 4.337579617834394e-06, "loss": 0.8943, "step": 16800 }, { "epoch": 19.04, "learning_rate": 4.098726114649681e-06, "loss": 0.9171, "step": 16850 }, { "epoch": 19.1, "learning_rate": 3.859872611464968e-06, "loss": 0.9144, "step": 16900 }, { "epoch": 19.15, "learning_rate": 3.6210191082802544e-06, "loss": 0.9517, "step": 16950 }, { "epoch": 19.21, "learning_rate": 3.382165605095541e-06, "loss": 0.8765, "step": 17000 }, { "epoch": 19.21, "eval_loss": 0.19622743129730225, "eval_runtime": 445.8046, "eval_samples_per_second": 13.925, "eval_steps_per_second": 0.87, "eval_wer": 0.17106504619466215, "step": 17000 }, { "epoch": 19.27, "learning_rate": 3.143312101910828e-06, "loss": 0.9072, "step": 17050 }, { "epoch": 19.32, "learning_rate": 2.9044585987261146e-06, "loss": 0.8897, "step": 17100 }, { "epoch": 19.38, "learning_rate": 2.6656050955414013e-06, "loss": 0.8879, "step": 17150 }, { "epoch": 19.43, "learning_rate": 2.426751592356688e-06, "loss": 0.883, "step": 17200 }, { "epoch": 19.49, "learning_rate": 2.1878980891719744e-06, "loss": 0.8992, "step": 17250 }, { "epoch": 19.49, "eval_loss": 0.19645148515701294, "eval_runtime": 447.1526, "eval_samples_per_second": 13.883, "eval_steps_per_second": 0.868, "eval_wer": 0.17034573598512803, "step": 17250 }, { "epoch": 19.55, "learning_rate": 1.949044585987261e-06, "loss": 0.8969, "step": 17300 }, { "epoch": 19.6, "learning_rate": 1.7101910828025476e-06, "loss": 0.872, "step": 17350 }, { "epoch": 19.66, "learning_rate": 1.4713375796178341e-06, "loss": 0.8984, "step": 17400 }, { "epoch": 19.72, "learning_rate": 1.2324840764331209e-06, "loss": 0.8913, "step": 17450 }, { "epoch": 19.77, "learning_rate": 9.936305732484076e-07, "loss": 0.8778, "step": 17500 }, { "epoch": 19.77, "eval_loss": 0.19571013748645782, "eval_runtime": 442.6975, "eval_samples_per_second": 14.023, "eval_steps_per_second": 0.876, "eval_wer": 0.16990667650658123, "step": 17500 }, { "epoch": 19.83, "learning_rate": 7.547770700636942e-07, "loss": 0.8687, "step": 17550 }, { "epoch": 19.89, "learning_rate": 5.159235668789809e-07, "loss": 0.8858, "step": 17600 }, { "epoch": 19.94, "learning_rate": 2.770700636942675e-07, "loss": 0.8854, "step": 17650 }, { "epoch": 20.0, "learning_rate": 3.821656050955413e-08, "loss": 0.8898, "step": 17700 }, { "epoch": 20.0, "step": 17700, "total_flos": 2.3221664293970497e+20, "train_loss": 1.2649466082470566, "train_runtime": 115228.5602, "train_samples_per_second": 9.837, "train_steps_per_second": 0.154 } ], "max_steps": 17700, "num_train_epochs": 20, "total_flos": 2.3221664293970497e+20, "trial_name": null, "trial_params": null }