{ "best_metric": 46.4318, "best_model_checkpoint": "outputs/modernisa-v2-byt5-base-lr0.0001/checkpoint-34000", "epoch": 5.0, "global_step": 57430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9.912937489117186e-05, "loss": 0.3717, "step": 500 }, { "epoch": 0.09, "learning_rate": 9.825874978234372e-05, "loss": 0.2696, "step": 1000 }, { "epoch": 0.09, "eval_bleu": 27.8571, "eval_cer": 34.4149, "eval_gen_len": 18.5, "eval_loss": 0.3027326464653015, "eval_runtime": 51.4554, "eval_samples_per_second": 45.593, "eval_steps_per_second": 2.857, "eval_wer": 49.5134, "step": 1000 }, { "epoch": 0.13, "learning_rate": 9.73881246735156e-05, "loss": 0.2564, "step": 1500 }, { "epoch": 0.17, "learning_rate": 9.651749956468745e-05, "loss": 0.2518, "step": 2000 }, { "epoch": 0.17, "eval_bleu": 29.2213, "eval_cer": 34.6336, "eval_gen_len": 18.5371, "eval_loss": 0.28565752506256104, "eval_runtime": 53.6563, "eval_samples_per_second": 43.723, "eval_steps_per_second": 2.74, "eval_wer": 49.1981, "step": 2000 }, { "epoch": 0.22, "learning_rate": 9.564687445585931e-05, "loss": 0.2387, "step": 2500 }, { "epoch": 0.26, "learning_rate": 9.477624934703117e-05, "loss": 0.2343, "step": 3000 }, { "epoch": 0.26, "eval_bleu": 29.5067, "eval_cer": 34.9795, "eval_gen_len": 18.5537, "eval_loss": 0.2730022966861725, "eval_runtime": 51.0673, "eval_samples_per_second": 45.939, "eval_steps_per_second": 2.879, "eval_wer": 49.117, "step": 3000 }, { "epoch": 0.3, "learning_rate": 9.390562423820304e-05, "loss": 0.2331, "step": 3500 }, { "epoch": 0.35, "learning_rate": 9.30349991293749e-05, "loss": 0.2292, "step": 4000 }, { "epoch": 0.35, "eval_bleu": 29.884, "eval_cer": 34.8015, "eval_gen_len": 18.5516, "eval_loss": 0.26898515224456787, "eval_runtime": 52.4703, "eval_samples_per_second": 44.711, "eval_steps_per_second": 2.802, "eval_wer": 48.7025, "step": 4000 }, { "epoch": 0.39, "learning_rate": 9.216437402054676e-05, "loss": 0.2243, "step": 4500 }, { "epoch": 0.44, "learning_rate": 9.129374891171862e-05, "loss": 0.2243, "step": 5000 }, { "epoch": 0.44, "eval_bleu": 29.9577, "eval_cer": 34.7218, "eval_gen_len": 18.5477, "eval_loss": 0.26465946435928345, "eval_runtime": 51.6529, "eval_samples_per_second": 45.419, "eval_steps_per_second": 2.846, "eval_wer": 48.8466, "step": 5000 }, { "epoch": 0.48, "learning_rate": 9.042312380289048e-05, "loss": 0.2185, "step": 5500 }, { "epoch": 0.52, "learning_rate": 8.955249869406234e-05, "loss": 0.2112, "step": 6000 }, { "epoch": 0.52, "eval_bleu": 30.3115, "eval_cer": 34.4895, "eval_gen_len": 18.5477, "eval_loss": 0.2636098265647888, "eval_runtime": 53.1222, "eval_samples_per_second": 44.162, "eval_steps_per_second": 2.767, "eval_wer": 48.3871, "step": 6000 }, { "epoch": 0.57, "learning_rate": 8.86818735852342e-05, "loss": 0.2165, "step": 6500 }, { "epoch": 0.61, "learning_rate": 8.781124847640607e-05, "loss": 0.2118, "step": 7000 }, { "epoch": 0.61, "eval_bleu": 30.6364, "eval_cer": 34.7455, "eval_gen_len": 18.5413, "eval_loss": 0.25554388761520386, "eval_runtime": 50.9916, "eval_samples_per_second": 46.008, "eval_steps_per_second": 2.883, "eval_wer": 48.3961, "step": 7000 }, { "epoch": 0.65, "learning_rate": 8.694062336757793e-05, "loss": 0.2105, "step": 7500 }, { "epoch": 0.7, "learning_rate": 8.606999825874978e-05, "loss": 0.205, "step": 8000 }, { "epoch": 0.7, "eval_bleu": 31.0881, "eval_cer": 34.0759, "eval_gen_len": 18.5269, "eval_loss": 0.2507636547088623, "eval_runtime": 53.5946, "eval_samples_per_second": 43.773, "eval_steps_per_second": 2.743, "eval_wer": 47.468, "step": 8000 }, { "epoch": 0.74, "learning_rate": 8.519937314992164e-05, "loss": 0.2035, "step": 8500 }, { "epoch": 0.78, "learning_rate": 8.432874804109351e-05, "loss": 0.2049, "step": 9000 }, { "epoch": 0.78, "eval_bleu": 31.1481, "eval_cer": 34.4133, "eval_gen_len": 18.5503, "eval_loss": 0.24714592099189758, "eval_runtime": 60.0036, "eval_samples_per_second": 39.098, "eval_steps_per_second": 2.45, "eval_wer": 47.5942, "step": 9000 }, { "epoch": 0.83, "learning_rate": 8.345812293226537e-05, "loss": 0.2074, "step": 9500 }, { "epoch": 0.87, "learning_rate": 8.258749782343723e-05, "loss": 0.2005, "step": 10000 }, { "epoch": 0.87, "eval_bleu": 30.9375, "eval_cer": 34.281, "eval_gen_len": 18.5405, "eval_loss": 0.24682185053825378, "eval_runtime": 51.6353, "eval_samples_per_second": 45.434, "eval_steps_per_second": 2.847, "eval_wer": 47.6392, "step": 10000 }, { "epoch": 0.91, "learning_rate": 8.171687271460909e-05, "loss": 0.2049, "step": 10500 }, { "epoch": 0.96, "learning_rate": 8.084624760578095e-05, "loss": 0.1999, "step": 11000 }, { "epoch": 0.96, "eval_bleu": 30.9692, "eval_cer": 34.4183, "eval_gen_len": 18.5405, "eval_loss": 0.24305607378482819, "eval_runtime": 53.15, "eval_samples_per_second": 44.139, "eval_steps_per_second": 2.766, "eval_wer": 47.7023, "step": 11000 }, { "epoch": 1.0, "learning_rate": 7.997562249695282e-05, "loss": 0.2037, "step": 11500 }, { "epoch": 1.04, "learning_rate": 7.910499738812468e-05, "loss": 0.161, "step": 12000 }, { "epoch": 1.04, "eval_bleu": 31.2337, "eval_cer": 34.1878, "eval_gen_len": 18.5298, "eval_loss": 0.24913541972637177, "eval_runtime": 51.1684, "eval_samples_per_second": 45.849, "eval_steps_per_second": 2.873, "eval_wer": 47.3238, "step": 12000 }, { "epoch": 1.09, "learning_rate": 7.823437227929654e-05, "loss": 0.1568, "step": 12500 }, { "epoch": 1.13, "learning_rate": 7.73637471704684e-05, "loss": 0.1601, "step": 13000 }, { "epoch": 1.13, "eval_bleu": 31.4422, "eval_cer": 34.1657, "eval_gen_len": 18.5371, "eval_loss": 0.24957244098186493, "eval_runtime": 52.0238, "eval_samples_per_second": 45.095, "eval_steps_per_second": 2.826, "eval_wer": 47.3689, "step": 13000 }, { "epoch": 1.18, "learning_rate": 7.649312206164027e-05, "loss": 0.1523, "step": 13500 }, { "epoch": 1.22, "learning_rate": 7.562249695281213e-05, "loss": 0.1606, "step": 14000 }, { "epoch": 1.22, "eval_bleu": 31.4582, "eval_cer": 34.2386, "eval_gen_len": 18.5405, "eval_loss": 0.24588151276111603, "eval_runtime": 51.9336, "eval_samples_per_second": 45.173, "eval_steps_per_second": 2.831, "eval_wer": 47.3329, "step": 14000 }, { "epoch": 1.26, "learning_rate": 7.475187184398399e-05, "loss": 0.158, "step": 14500 }, { "epoch": 1.31, "learning_rate": 7.388124673515585e-05, "loss": 0.1594, "step": 15000 }, { "epoch": 1.31, "eval_bleu": 31.386, "eval_cer": 34.2912, "eval_gen_len": 18.5375, "eval_loss": 0.24664774537086487, "eval_runtime": 51.7169, "eval_samples_per_second": 45.362, "eval_steps_per_second": 2.842, "eval_wer": 47.1166, "step": 15000 }, { "epoch": 1.35, "learning_rate": 7.30106216263277e-05, "loss": 0.1609, "step": 15500 }, { "epoch": 1.39, "learning_rate": 7.213999651749956e-05, "loss": 0.1617, "step": 16000 }, { "epoch": 1.39, "eval_bleu": 31.6546, "eval_cer": 34.0149, "eval_gen_len": 18.5294, "eval_loss": 0.2411554753780365, "eval_runtime": 52.5275, "eval_samples_per_second": 44.662, "eval_steps_per_second": 2.799, "eval_wer": 46.8373, "step": 16000 }, { "epoch": 1.44, "learning_rate": 7.126937140867142e-05, "loss": 0.1594, "step": 16500 }, { "epoch": 1.48, "learning_rate": 7.03987462998433e-05, "loss": 0.1582, "step": 17000 }, { "epoch": 1.48, "eval_bleu": 31.2924, "eval_cer": 34.2573, "eval_gen_len": 18.5503, "eval_loss": 0.24606570601463318, "eval_runtime": 50.8697, "eval_samples_per_second": 46.118, "eval_steps_per_second": 2.89, "eval_wer": 47.4139, "step": 17000 }, { "epoch": 1.52, "learning_rate": 6.952812119101515e-05, "loss": 0.1599, "step": 17500 }, { "epoch": 1.57, "learning_rate": 6.865749608218701e-05, "loss": 0.1572, "step": 18000 }, { "epoch": 1.57, "eval_bleu": 31.1484, "eval_cer": 34.3675, "eval_gen_len": 18.5499, "eval_loss": 0.24250736832618713, "eval_runtime": 52.5057, "eval_samples_per_second": 44.681, "eval_steps_per_second": 2.8, "eval_wer": 47.45, "step": 18000 }, { "epoch": 1.61, "learning_rate": 6.778687097335887e-05, "loss": 0.1574, "step": 18500 }, { "epoch": 1.65, "learning_rate": 6.691624586453074e-05, "loss": 0.1565, "step": 19000 }, { "epoch": 1.65, "eval_bleu": 31.6967, "eval_cer": 34.1047, "eval_gen_len": 18.5388, "eval_loss": 0.2424342930316925, "eval_runtime": 51.0272, "eval_samples_per_second": 45.975, "eval_steps_per_second": 2.881, "eval_wer": 46.9724, "step": 19000 }, { "epoch": 1.7, "learning_rate": 6.60456207557026e-05, "loss": 0.1582, "step": 19500 }, { "epoch": 1.74, "learning_rate": 6.517499564687446e-05, "loss": 0.1585, "step": 20000 }, { "epoch": 1.74, "eval_bleu": 31.9026, "eval_cer": 34.281, "eval_gen_len": 18.558, "eval_loss": 0.2381763756275177, "eval_runtime": 52.4669, "eval_samples_per_second": 44.714, "eval_steps_per_second": 2.802, "eval_wer": 47.0175, "step": 20000 }, { "epoch": 1.78, "learning_rate": 6.430437053804632e-05, "loss": 0.1559, "step": 20500 }, { "epoch": 1.83, "learning_rate": 6.343374542921819e-05, "loss": 0.1522, "step": 21000 }, { "epoch": 1.83, "eval_bleu": 32.1619, "eval_cer": 33.9369, "eval_gen_len": 18.5311, "eval_loss": 0.23654896020889282, "eval_runtime": 52.4567, "eval_samples_per_second": 44.723, "eval_steps_per_second": 2.802, "eval_wer": 46.5219, "step": 21000 }, { "epoch": 1.87, "learning_rate": 6.256312032039005e-05, "loss": 0.1578, "step": 21500 }, { "epoch": 1.92, "learning_rate": 6.169249521156191e-05, "loss": 0.156, "step": 22000 }, { "epoch": 1.92, "eval_bleu": 31.7762, "eval_cer": 33.9572, "eval_gen_len": 18.5401, "eval_loss": 0.2381468415260315, "eval_runtime": 56.1509, "eval_samples_per_second": 41.78, "eval_steps_per_second": 2.618, "eval_wer": 46.7922, "step": 22000 }, { "epoch": 1.96, "learning_rate": 6.0821870102733766e-05, "loss": 0.1567, "step": 22500 }, { "epoch": 2.0, "learning_rate": 5.995124499390563e-05, "loss": 0.1538, "step": 23000 }, { "epoch": 2.0, "eval_bleu": 31.8785, "eval_cer": 34.2319, "eval_gen_len": 18.5516, "eval_loss": 0.24016974866390228, "eval_runtime": 53.089, "eval_samples_per_second": 44.19, "eval_steps_per_second": 2.769, "eval_wer": 46.8012, "step": 23000 }, { "epoch": 2.05, "learning_rate": 5.908061988507749e-05, "loss": 0.1075, "step": 23500 }, { "epoch": 2.09, "learning_rate": 5.8209994776249355e-05, "loss": 0.1083, "step": 24000 }, { "epoch": 2.09, "eval_bleu": 31.9905, "eval_cer": 34.0098, "eval_gen_len": 18.5384, "eval_loss": 0.2653577923774719, "eval_runtime": 52.9572, "eval_samples_per_second": 44.3, "eval_steps_per_second": 2.776, "eval_wer": 46.603, "step": 24000 }, { "epoch": 2.13, "learning_rate": 5.7339369667421214e-05, "loss": 0.108, "step": 24500 }, { "epoch": 2.18, "learning_rate": 5.6468744558593066e-05, "loss": 0.1086, "step": 25000 }, { "epoch": 2.18, "eval_bleu": 31.6257, "eval_cer": 34.2607, "eval_gen_len": 18.5409, "eval_loss": 0.26182088255882263, "eval_runtime": 51.8801, "eval_samples_per_second": 45.22, "eval_steps_per_second": 2.833, "eval_wer": 46.9995, "step": 25000 }, { "epoch": 2.22, "learning_rate": 5.559811944976493e-05, "loss": 0.1085, "step": 25500 }, { "epoch": 2.26, "learning_rate": 5.472749434093679e-05, "loss": 0.1092, "step": 26000 }, { "epoch": 2.26, "eval_bleu": 31.4886, "eval_cer": 34.337, "eval_gen_len": 18.5422, "eval_loss": 0.2658332288265228, "eval_runtime": 51.9175, "eval_samples_per_second": 45.187, "eval_steps_per_second": 2.831, "eval_wer": 47.1436, "step": 26000 }, { "epoch": 2.31, "learning_rate": 5.3856869232108655e-05, "loss": 0.1106, "step": 26500 }, { "epoch": 2.35, "learning_rate": 5.2986244123280514e-05, "loss": 0.1086, "step": 27000 }, { "epoch": 2.35, "eval_bleu": 31.8448, "eval_cer": 34.1217, "eval_gen_len": 18.5375, "eval_loss": 0.26663142442703247, "eval_runtime": 51.6292, "eval_samples_per_second": 45.439, "eval_steps_per_second": 2.847, "eval_wer": 46.6751, "step": 27000 }, { "epoch": 2.39, "learning_rate": 5.211561901445238e-05, "loss": 0.1087, "step": 27500 }, { "epoch": 2.44, "learning_rate": 5.124499390562424e-05, "loss": 0.1098, "step": 28000 }, { "epoch": 2.44, "eval_bleu": 31.709, "eval_cer": 34.1946, "eval_gen_len": 18.5452, "eval_loss": 0.2659294009208679, "eval_runtime": 58.8373, "eval_samples_per_second": 39.873, "eval_steps_per_second": 2.498, "eval_wer": 46.8913, "step": 28000 }, { "epoch": 2.48, "learning_rate": 5.03743687967961e-05, "loss": 0.1106, "step": 28500 }, { "epoch": 2.52, "learning_rate": 4.950374368796796e-05, "loss": 0.1117, "step": 29000 }, { "epoch": 2.52, "eval_bleu": 31.8114, "eval_cer": 34.1708, "eval_gen_len": 18.5431, "eval_loss": 0.2648890018463135, "eval_runtime": 57.7914, "eval_samples_per_second": 40.594, "eval_steps_per_second": 2.544, "eval_wer": 46.8913, "step": 29000 }, { "epoch": 2.57, "learning_rate": 4.863311857913983e-05, "loss": 0.1087, "step": 29500 }, { "epoch": 2.61, "learning_rate": 4.7762493470311686e-05, "loss": 0.1094, "step": 30000 }, { "epoch": 2.61, "eval_bleu": 31.6955, "eval_cer": 34.1606, "eval_gen_len": 18.5375, "eval_loss": 0.26563677191734314, "eval_runtime": 53.0374, "eval_samples_per_second": 44.233, "eval_steps_per_second": 2.772, "eval_wer": 46.8643, "step": 30000 }, { "epoch": 2.66, "learning_rate": 4.689186836148355e-05, "loss": 0.1079, "step": 30500 }, { "epoch": 2.7, "learning_rate": 4.602124325265541e-05, "loss": 0.1077, "step": 31000 }, { "epoch": 2.7, "eval_bleu": 31.5495, "eval_cer": 34.0064, "eval_gen_len": 18.5448, "eval_loss": 0.2636907398700714, "eval_runtime": 53.9667, "eval_samples_per_second": 43.471, "eval_steps_per_second": 2.724, "eval_wer": 46.8823, "step": 31000 }, { "epoch": 2.74, "learning_rate": 4.5150618143827276e-05, "loss": 0.1103, "step": 31500 }, { "epoch": 2.79, "learning_rate": 4.427999303499913e-05, "loss": 0.1088, "step": 32000 }, { "epoch": 2.79, "eval_bleu": 32.0837, "eval_cer": 33.9504, "eval_gen_len": 18.5413, "eval_loss": 0.2668997645378113, "eval_runtime": 50.6471, "eval_samples_per_second": 46.321, "eval_steps_per_second": 2.902, "eval_wer": 46.612, "step": 32000 }, { "epoch": 2.83, "learning_rate": 4.340936792617099e-05, "loss": 0.1087, "step": 32500 }, { "epoch": 2.87, "learning_rate": 4.253874281734285e-05, "loss": 0.1087, "step": 33000 }, { "epoch": 2.87, "eval_bleu": 31.5549, "eval_cer": 34.2149, "eval_gen_len": 18.5286, "eval_loss": 0.264612078666687, "eval_runtime": 52.8215, "eval_samples_per_second": 44.414, "eval_steps_per_second": 2.783, "eval_wer": 47.0806, "step": 33000 }, { "epoch": 2.92, "learning_rate": 4.166811770851472e-05, "loss": 0.1091, "step": 33500 }, { "epoch": 2.96, "learning_rate": 4.0797492599686575e-05, "loss": 0.1077, "step": 34000 }, { "epoch": 2.96, "eval_bleu": 32.1129, "eval_cer": 33.9403, "eval_gen_len": 18.5452, "eval_loss": 0.26299673318862915, "eval_runtime": 53.065, "eval_samples_per_second": 44.21, "eval_steps_per_second": 2.77, "eval_wer": 46.4318, "step": 34000 }, { "epoch": 3.0, "learning_rate": 3.992686749085844e-05, "loss": 0.1037, "step": 34500 }, { "epoch": 3.05, "learning_rate": 3.90562423820303e-05, "loss": 0.0652, "step": 35000 }, { "epoch": 3.05, "eval_bleu": 31.3861, "eval_cer": 34.1149, "eval_gen_len": 18.5396, "eval_loss": 0.33602526783943176, "eval_runtime": 53.5525, "eval_samples_per_second": 43.807, "eval_steps_per_second": 2.745, "eval_wer": 47.1977, "step": 35000 }, { "epoch": 3.09, "learning_rate": 3.8185617273202165e-05, "loss": 0.0648, "step": 35500 }, { "epoch": 3.13, "learning_rate": 3.7314992164374024e-05, "loss": 0.0662, "step": 36000 }, { "epoch": 3.13, "eval_bleu": 31.2372, "eval_cer": 34.203, "eval_gen_len": 18.552, "eval_loss": 0.3401012718677521, "eval_runtime": 51.9918, "eval_samples_per_second": 45.122, "eval_steps_per_second": 2.827, "eval_wer": 47.3869, "step": 36000 }, { "epoch": 3.18, "learning_rate": 3.644436705554589e-05, "loss": 0.0635, "step": 36500 }, { "epoch": 3.22, "learning_rate": 3.557374194671774e-05, "loss": 0.0666, "step": 37000 }, { "epoch": 3.22, "eval_bleu": 31.3462, "eval_cer": 34.1759, "eval_gen_len": 18.5469, "eval_loss": 0.33890092372894287, "eval_runtime": 51.8417, "eval_samples_per_second": 45.253, "eval_steps_per_second": 2.836, "eval_wer": 47.2968, "step": 37000 }, { "epoch": 3.26, "learning_rate": 3.4703116837889606e-05, "loss": 0.0646, "step": 37500 }, { "epoch": 3.31, "learning_rate": 3.3832491729061465e-05, "loss": 0.0648, "step": 38000 }, { "epoch": 3.31, "eval_bleu": 30.835, "eval_cer": 34.381, "eval_gen_len": 18.552, "eval_loss": 0.3339092433452606, "eval_runtime": 51.3621, "eval_samples_per_second": 45.676, "eval_steps_per_second": 2.862, "eval_wer": 47.6753, "step": 38000 }, { "epoch": 3.35, "learning_rate": 3.296186662023333e-05, "loss": 0.0648, "step": 38500 }, { "epoch": 3.4, "learning_rate": 3.209124151140519e-05, "loss": 0.0654, "step": 39000 }, { "epoch": 3.4, "eval_bleu": 31.0958, "eval_cer": 34.4692, "eval_gen_len": 18.5524, "eval_loss": 0.33954936265945435, "eval_runtime": 51.3892, "eval_samples_per_second": 45.652, "eval_steps_per_second": 2.861, "eval_wer": 47.7203, "step": 39000 }, { "epoch": 3.44, "learning_rate": 3.1220616402577054e-05, "loss": 0.0645, "step": 39500 }, { "epoch": 3.48, "learning_rate": 3.0349991293748913e-05, "loss": 0.0663, "step": 40000 }, { "epoch": 3.48, "eval_bleu": 31.126, "eval_cer": 34.4539, "eval_gen_len": 18.5499, "eval_loss": 0.3317714333534241, "eval_runtime": 53.3556, "eval_samples_per_second": 43.969, "eval_steps_per_second": 2.755, "eval_wer": 47.5942, "step": 40000 }, { "epoch": 3.53, "learning_rate": 2.9479366184920775e-05, "loss": 0.0657, "step": 40500 }, { "epoch": 3.57, "learning_rate": 2.8608741076092637e-05, "loss": 0.0648, "step": 41000 }, { "epoch": 3.57, "eval_bleu": 31.0295, "eval_cer": 34.3539, "eval_gen_len": 18.5477, "eval_loss": 0.33970215916633606, "eval_runtime": 52.8717, "eval_samples_per_second": 44.372, "eval_steps_per_second": 2.78, "eval_wer": 47.5852, "step": 41000 }, { "epoch": 3.61, "learning_rate": 2.77381159672645e-05, "loss": 0.0647, "step": 41500 }, { "epoch": 3.66, "learning_rate": 2.6867490858436354e-05, "loss": 0.0635, "step": 42000 }, { "epoch": 3.66, "eval_bleu": 31.1287, "eval_cer": 34.4285, "eval_gen_len": 18.5494, "eval_loss": 0.3414219617843628, "eval_runtime": 52.9358, "eval_samples_per_second": 44.318, "eval_steps_per_second": 2.777, "eval_wer": 47.5491, "step": 42000 }, { "epoch": 3.7, "learning_rate": 2.5996865749608216e-05, "loss": 0.0657, "step": 42500 }, { "epoch": 3.74, "learning_rate": 2.512624064078008e-05, "loss": 0.0656, "step": 43000 }, { "epoch": 3.74, "eval_bleu": 30.9225, "eval_cer": 34.4285, "eval_gen_len": 18.5563, "eval_loss": 0.3393559455871582, "eval_runtime": 51.5264, "eval_samples_per_second": 45.53, "eval_steps_per_second": 2.853, "eval_wer": 47.6392, "step": 43000 }, { "epoch": 3.79, "learning_rate": 2.425561553195194e-05, "loss": 0.0659, "step": 43500 }, { "epoch": 3.83, "learning_rate": 2.3384990423123803e-05, "loss": 0.0625, "step": 44000 }, { "epoch": 3.83, "eval_bleu": 31.2435, "eval_cer": 34.1674, "eval_gen_len": 18.5439, "eval_loss": 0.341974139213562, "eval_runtime": 51.6192, "eval_samples_per_second": 45.448, "eval_steps_per_second": 2.848, "eval_wer": 47.2968, "step": 44000 }, { "epoch": 3.87, "learning_rate": 2.2514365314295665e-05, "loss": 0.0642, "step": 44500 }, { "epoch": 3.92, "learning_rate": 2.1643740205467527e-05, "loss": 0.0636, "step": 45000 }, { "epoch": 3.92, "eval_bleu": 31.0688, "eval_cer": 34.3743, "eval_gen_len": 18.5439, "eval_loss": 0.3447582423686981, "eval_runtime": 51.0408, "eval_samples_per_second": 45.963, "eval_steps_per_second": 2.88, "eval_wer": 47.6843, "step": 45000 }, { "epoch": 3.96, "learning_rate": 2.0773115096639385e-05, "loss": 0.0644, "step": 45500 }, { "epoch": 4.0, "learning_rate": 1.9902489987811247e-05, "loss": 0.0586, "step": 46000 }, { "epoch": 4.0, "eval_bleu": 31.2353, "eval_cer": 34.2963, "eval_gen_len": 18.549, "eval_loss": 0.36747270822525024, "eval_runtime": 54.2264, "eval_samples_per_second": 43.263, "eval_steps_per_second": 2.711, "eval_wer": 47.441, "step": 46000 }, { "epoch": 4.05, "learning_rate": 1.903186487898311e-05, "loss": 0.0326, "step": 46500 }, { "epoch": 4.09, "learning_rate": 1.816123977015497e-05, "loss": 0.0298, "step": 47000 }, { "epoch": 4.09, "eval_bleu": 30.698, "eval_cer": 34.4319, "eval_gen_len": 18.5512, "eval_loss": 0.45656564831733704, "eval_runtime": 53.4442, "eval_samples_per_second": 43.896, "eval_steps_per_second": 2.751, "eval_wer": 47.8555, "step": 47000 }, { "epoch": 4.14, "learning_rate": 1.7290614661326833e-05, "loss": 0.0308, "step": 47500 }, { "epoch": 4.18, "learning_rate": 1.6419989552498692e-05, "loss": 0.0301, "step": 48000 }, { "epoch": 4.18, "eval_bleu": 30.7773, "eval_cer": 34.3861, "eval_gen_len": 18.5507, "eval_loss": 0.4724096655845642, "eval_runtime": 52.6433, "eval_samples_per_second": 44.564, "eval_steps_per_second": 2.792, "eval_wer": 47.8374, "step": 48000 }, { "epoch": 4.22, "learning_rate": 1.5549364443670554e-05, "loss": 0.0307, "step": 48500 }, { "epoch": 4.27, "learning_rate": 1.4678739334842418e-05, "loss": 0.0311, "step": 49000 }, { "epoch": 4.27, "eval_bleu": 31.0878, "eval_cer": 34.3861, "eval_gen_len": 18.5503, "eval_loss": 0.4639967978000641, "eval_runtime": 51.8688, "eval_samples_per_second": 45.229, "eval_steps_per_second": 2.834, "eval_wer": 47.6212, "step": 49000 }, { "epoch": 4.31, "learning_rate": 1.380811422601428e-05, "loss": 0.0313, "step": 49500 }, { "epoch": 4.35, "learning_rate": 1.2937489117186142e-05, "loss": 0.03, "step": 50000 }, { "epoch": 4.35, "eval_bleu": 30.8319, "eval_cer": 34.459, "eval_gen_len": 18.5529, "eval_loss": 0.46544739603996277, "eval_runtime": 56.982, "eval_samples_per_second": 41.171, "eval_steps_per_second": 2.58, "eval_wer": 47.8915, "step": 50000 }, { "epoch": 4.4, "learning_rate": 1.2066864008358002e-05, "loss": 0.0306, "step": 50500 }, { "epoch": 4.44, "learning_rate": 1.1196238899529864e-05, "loss": 0.0302, "step": 51000 }, { "epoch": 4.44, "eval_bleu": 30.9236, "eval_cer": 34.4997, "eval_gen_len": 18.552, "eval_loss": 0.46650850772857666, "eval_runtime": 50.604, "eval_samples_per_second": 46.36, "eval_steps_per_second": 2.905, "eval_wer": 47.9276, "step": 51000 }, { "epoch": 4.48, "learning_rate": 1.0325613790701725e-05, "loss": 0.0303, "step": 51500 }, { "epoch": 4.53, "learning_rate": 9.454988681873587e-06, "loss": 0.029, "step": 52000 }, { "epoch": 4.53, "eval_bleu": 30.8307, "eval_cer": 34.4997, "eval_gen_len": 18.5482, "eval_loss": 0.4756769835948944, "eval_runtime": 53.9866, "eval_samples_per_second": 43.455, "eval_steps_per_second": 2.723, "eval_wer": 47.9456, "step": 52000 }, { "epoch": 4.57, "learning_rate": 8.584363573045447e-06, "loss": 0.031, "step": 52500 }, { "epoch": 4.61, "learning_rate": 7.713738464217309e-06, "loss": 0.0301, "step": 53000 }, { "epoch": 4.61, "eval_bleu": 30.7983, "eval_cer": 34.5218, "eval_gen_len": 18.5473, "eval_loss": 0.4672394096851349, "eval_runtime": 52.9301, "eval_samples_per_second": 44.323, "eval_steps_per_second": 2.777, "eval_wer": 47.9456, "step": 53000 }, { "epoch": 4.66, "learning_rate": 6.843113355389169e-06, "loss": 0.0296, "step": 53500 }, { "epoch": 4.7, "learning_rate": 5.972488246561031e-06, "loss": 0.0294, "step": 54000 }, { "epoch": 4.7, "eval_bleu": 30.8924, "eval_cer": 34.4353, "eval_gen_len": 18.5529, "eval_loss": 0.4714747667312622, "eval_runtime": 53.7737, "eval_samples_per_second": 43.627, "eval_steps_per_second": 2.734, "eval_wer": 47.7564, "step": 54000 }, { "epoch": 4.74, "learning_rate": 5.1018631377328925e-06, "loss": 0.0292, "step": 54500 }, { "epoch": 4.79, "learning_rate": 4.2312380289047546e-06, "loss": 0.0288, "step": 55000 }, { "epoch": 4.79, "eval_bleu": 30.7372, "eval_cer": 34.4675, "eval_gen_len": 18.5524, "eval_loss": 0.4751755893230438, "eval_runtime": 51.756, "eval_samples_per_second": 45.328, "eval_steps_per_second": 2.84, "eval_wer": 47.7924, "step": 55000 }, { "epoch": 4.83, "learning_rate": 3.3606129200766153e-06, "loss": 0.0293, "step": 55500 }, { "epoch": 4.88, "learning_rate": 2.4899878112484765e-06, "loss": 0.0289, "step": 56000 }, { "epoch": 4.88, "eval_bleu": 30.8554, "eval_cer": 34.459, "eval_gen_len": 18.5516, "eval_loss": 0.4744004011154175, "eval_runtime": 51.9465, "eval_samples_per_second": 45.162, "eval_steps_per_second": 2.83, "eval_wer": 47.8555, "step": 56000 }, { "epoch": 4.92, "learning_rate": 1.619362702420338e-06, "loss": 0.0285, "step": 56500 }, { "epoch": 4.96, "learning_rate": 7.487375935921992e-07, "loss": 0.0288, "step": 57000 }, { "epoch": 4.96, "eval_bleu": 30.8745, "eval_cer": 34.4895, "eval_gen_len": 18.5499, "eval_loss": 0.4744308888912201, "eval_runtime": 51.4462, "eval_samples_per_second": 45.601, "eval_steps_per_second": 2.857, "eval_wer": 47.8194, "step": 57000 }, { "epoch": 5.0, "step": 57430, "total_flos": 1.2162866037733786e+17, "train_loss": 0.11780740710349318, "train_runtime": 33856.2876, "train_samples_per_second": 27.14, "train_steps_per_second": 1.696 } ], "max_steps": 57430, "num_train_epochs": 5, "total_flos": 1.2162866037733786e+17, "trial_name": null, "trial_params": null }