{ "best_metric": null, "best_model_checkpoint": null, "epoch": 198.62941847206386, "global_step": 87000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.42, "learning_rate": 5.988000000000001e-05, "loss": 1.7702, "step": 1500 }, { "epoch": 3.42, "eval_cer": 0.8902483010528606, "eval_loss": 1.379656434059143, "eval_runtime": 157.8178, "eval_samples_per_second": 23.134, "eval_steps_per_second": 1.451, "eval_wer": 0.9197613252728272, "step": 1500 }, { "epoch": 6.85, "learning_rate": 7.906822429906543e-05, "loss": 0.6423, "step": 3000 }, { "epoch": 6.85, "eval_cer": 0.8909467730023416, "eval_loss": 1.096530795097351, "eval_runtime": 155.6392, "eval_samples_per_second": 23.458, "eval_steps_per_second": 1.471, "eval_wer": 0.92135398557633, "step": 3000 }, { "epoch": 10.27, "learning_rate": 7.766728971962618e-05, "loss": 0.5752, "step": 4500 }, { "epoch": 10.27, "eval_cer": 0.8896414319820001, "eval_loss": 0.9504674077033997, "eval_runtime": 156.6885, "eval_samples_per_second": 23.301, "eval_steps_per_second": 1.461, "eval_wer": 0.9201763142251483, "step": 4500 }, { "epoch": 13.7, "learning_rate": 7.626728971962617e-05, "loss": 0.5136, "step": 6000 }, { "epoch": 13.7, "eval_cer": 0.8656071267039567, "eval_loss": 0.49109727144241333, "eval_runtime": 157.3097, "eval_samples_per_second": 23.209, "eval_steps_per_second": 1.456, "eval_wer": 0.8955349431913772, "step": 6000 }, { "epoch": 17.12, "learning_rate": 7.486542056074767e-05, "loss": 0.5015, "step": 7500 }, { "epoch": 17.12, "eval_cer": 0.818803780908815, "eval_loss": 0.4844619333744049, "eval_runtime": 157.3062, "eval_samples_per_second": 23.21, "eval_steps_per_second": 1.456, "eval_wer": 0.849381442142689, "step": 7500 }, { "epoch": 20.55, "learning_rate": 7.346355140186916e-05, "loss": 0.462, "step": 9000 }, { "epoch": 20.55, "eval_cer": 0.8027961778698895, "eval_loss": 0.4619074761867523, "eval_runtime": 157.0856, "eval_samples_per_second": 23.242, "eval_steps_per_second": 1.458, "eval_wer": 0.8343072488475645, "step": 9000 }, { "epoch": 23.97, "learning_rate": 7.206168224299066e-05, "loss": 0.444, "step": 10500 }, { "epoch": 23.97, "eval_cer": 0.7654622796521381, "eval_loss": 0.4942396581172943, "eval_runtime": 156.8664, "eval_samples_per_second": 23.275, "eval_steps_per_second": 1.46, "eval_wer": 0.7982929373366682, "step": 10500 }, { "epoch": 27.4, "learning_rate": 7.066074766355141e-05, "loss": 0.4318, "step": 12000 }, { "epoch": 27.4, "eval_cer": 0.7344947814985086, "eval_loss": 0.4564968943595886, "eval_runtime": 156.676, "eval_samples_per_second": 23.303, "eval_steps_per_second": 1.462, "eval_wer": 0.7668771520541953, "step": 12000 }, { "epoch": 30.82, "learning_rate": 6.92588785046729e-05, "loss": 0.3877, "step": 13500 }, { "epoch": 30.82, "eval_cer": 0.7305844836173977, "eval_loss": 0.44864970445632935, "eval_runtime": 156.7014, "eval_samples_per_second": 23.299, "eval_steps_per_second": 1.461, "eval_wer": 0.7626823988604627, "step": 13500 }, { "epoch": 34.25, "learning_rate": 6.78570093457944e-05, "loss": 0.3734, "step": 15000 }, { "epoch": 34.25, "eval_cer": 0.716420388510709, "eval_loss": 0.4351600408554077, "eval_runtime": 156.7512, "eval_samples_per_second": 23.292, "eval_steps_per_second": 1.461, "eval_wer": 0.7498626050090288, "step": 15000 }, { "epoch": 37.67, "learning_rate": 6.64551401869159e-05, "loss": 0.3595, "step": 16500 }, { "epoch": 37.67, "eval_cer": 0.6001820607212581, "eval_loss": 0.4632013738155365, "eval_runtime": 156.8539, "eval_samples_per_second": 23.276, "eval_steps_per_second": 1.46, "eval_wer": 0.6390493388216557, "step": 16500 }, { "epoch": 41.1, "learning_rate": 6.505327102803739e-05, "loss": 0.3466, "step": 18000 }, { "epoch": 41.1, "eval_cer": 0.592630548414984, "eval_loss": 0.4672980308532715, "eval_runtime": 155.7423, "eval_samples_per_second": 23.443, "eval_steps_per_second": 1.47, "eval_wer": 0.6305925369284088, "step": 18000 }, { "epoch": 44.52, "learning_rate": 6.365140186915889e-05, "loss": 0.3346, "step": 19500 }, { "epoch": 44.52, "eval_cer": 0.5607641970148912, "eval_loss": 0.48392337560653687, "eval_runtime": 156.8128, "eval_samples_per_second": 23.283, "eval_steps_per_second": 1.46, "eval_wer": 0.601790060453796, "step": 19500 }, { "epoch": 47.94, "learning_rate": 6.224953271028038e-05, "loss": 0.3224, "step": 21000 }, { "epoch": 47.94, "eval_cer": 0.5324932586006516, "eval_loss": 0.47312837839126587, "eval_runtime": 155.7519, "eval_samples_per_second": 23.441, "eval_steps_per_second": 1.47, "eval_wer": 0.5755448131988918, "step": 21000 }, { "epoch": 51.37, "learning_rate": 6.0849532710280374e-05, "loss": 0.3115, "step": 22500 }, { "epoch": 51.37, "eval_cer": 0.5504359724504343, "eval_loss": 0.46229076385498047, "eval_runtime": 157.1658, "eval_samples_per_second": 23.23, "eval_steps_per_second": 1.457, "eval_wer": 0.5916060072454827, "step": 22500 }, { "epoch": 54.79, "learning_rate": 5.9448598130841125e-05, "loss": 0.3004, "step": 24000 }, { "epoch": 54.79, "eval_cer": 0.4698426148041702, "eval_loss": 0.4939606785774231, "eval_runtime": 157.0777, "eval_samples_per_second": 23.243, "eval_steps_per_second": 1.458, "eval_wer": 0.5165603023811393, "step": 24000 }, { "epoch": 58.22, "learning_rate": 5.804672897196262e-05, "loss": 0.2911, "step": 25500 }, { "epoch": 58.22, "eval_cer": 0.4784017587752695, "eval_loss": 0.49099990725517273, "eval_runtime": 155.9127, "eval_samples_per_second": 23.417, "eval_steps_per_second": 1.469, "eval_wer": 0.5228972958422593, "step": 25500 }, { "epoch": 61.64, "learning_rate": 5.664579439252337e-05, "loss": 0.2816, "step": 27000 }, { "epoch": 61.64, "eval_cer": 0.48053152570319524, "eval_loss": 0.48187437653541565, "eval_runtime": 157.1059, "eval_samples_per_second": 23.239, "eval_steps_per_second": 1.458, "eval_wer": 0.524153478616853, "step": 27000 }, { "epoch": 65.07, "learning_rate": 5.5244859813084115e-05, "loss": 0.2753, "step": 28500 }, { "epoch": 65.07, "eval_cer": 0.43459840725494797, "eval_loss": 0.5083651542663574, "eval_runtime": 156.7879, "eval_samples_per_second": 23.286, "eval_steps_per_second": 1.461, "eval_wer": 0.48194798057403065, "step": 28500 }, { "epoch": 68.49, "learning_rate": 5.384299065420561e-05, "loss": 0.2628, "step": 30000 }, { "epoch": 68.49, "eval_cer": 0.441067860557518, "eval_loss": 0.5106444954872131, "eval_runtime": 161.9652, "eval_samples_per_second": 22.542, "eval_steps_per_second": 1.414, "eval_wer": 0.4881952466941083, "step": 30000 }, { "epoch": 71.92, "learning_rate": 5.2441121495327106e-05, "loss": 0.2534, "step": 31500 }, { "epoch": 71.92, "eval_cer": 0.40500495228062544, "eval_loss": 0.555060088634491, "eval_runtime": 162.1242, "eval_samples_per_second": 22.52, "eval_steps_per_second": 1.412, "eval_wer": 0.45390818649827835, "step": 31500 }, { "epoch": 75.34, "learning_rate": 5.104018691588786e-05, "loss": 0.2464, "step": 33000 }, { "epoch": 75.34, "eval_cer": 0.4066767048154488, "eval_loss": 0.5396202206611633, "eval_runtime": 161.0534, "eval_samples_per_second": 22.669, "eval_steps_per_second": 1.422, "eval_wer": 0.45515315335524176, "step": 33000 }, { "epoch": 78.77, "learning_rate": 4.96392523364486e-05, "loss": 0.2447, "step": 34500 }, { "epoch": 78.77, "eval_cer": 0.38928360823738883, "eval_loss": 0.5427628755569458, "eval_runtime": 161.5749, "eval_samples_per_second": 22.596, "eval_steps_per_second": 1.417, "eval_wer": 0.4384190042508328, "step": 34500 }, { "epoch": 82.19, "learning_rate": 4.82392523364486e-05, "loss": 0.2311, "step": 36000 }, { "epoch": 82.19, "eval_cer": 0.3919687176169511, "eval_loss": 0.537956714630127, "eval_runtime": 161.1887, "eval_samples_per_second": 22.65, "eval_steps_per_second": 1.421, "eval_wer": 0.43954059601386286, "step": 36000 }, { "epoch": 85.62, "learning_rate": 4.6837383177570096e-05, "loss": 0.2238, "step": 37500 }, { "epoch": 85.62, "eval_cer": 0.36581037059089583, "eval_loss": 0.5633525848388672, "eval_runtime": 160.6641, "eval_samples_per_second": 22.724, "eval_steps_per_second": 1.425, "eval_wer": 0.4180508978342063, "step": 37500 }, { "epoch": 89.04, "learning_rate": 4.543644859813085e-05, "loss": 0.2176, "step": 39000 }, { "epoch": 89.04, "eval_cer": 0.34971688985326366, "eval_loss": 0.5895515084266663, "eval_runtime": 161.4249, "eval_samples_per_second": 22.617, "eval_steps_per_second": 1.419, "eval_wer": 0.4021355107168093, "step": 39000 }, { "epoch": 92.47, "learning_rate": 4.4034579439252336e-05, "loss": 0.2106, "step": 40500 }, { "epoch": 92.47, "eval_cer": 0.354417262562476, "eval_loss": 0.5802760124206543, "eval_runtime": 161.093, "eval_samples_per_second": 22.664, "eval_steps_per_second": 1.422, "eval_wer": 0.40675646878049326, "step": 40500 }, { "epoch": 95.89, "learning_rate": 4.263271028037384e-05, "loss": 0.2058, "step": 42000 }, { "epoch": 95.89, "eval_cer": 0.34699742939421874, "eval_loss": 0.5864582657814026, "eval_runtime": 161.933, "eval_samples_per_second": 22.546, "eval_steps_per_second": 1.414, "eval_wer": 0.400744736930652, "step": 42000 }, { "epoch": 99.31, "learning_rate": 4.1230841121495333e-05, "loss": 0.1995, "step": 43500 }, { "epoch": 99.31, "eval_cer": 0.33578752712303983, "eval_loss": 0.5888575315475464, "eval_runtime": 161.5039, "eval_samples_per_second": 22.606, "eval_steps_per_second": 1.418, "eval_wer": 0.3900335355937146, "step": 43500 }, { "epoch": 102.74, "learning_rate": 3.982990654205608e-05, "loss": 0.1956, "step": 45000 }, { "epoch": 102.74, "eval_cer": 0.3373562264194152, "eval_loss": 0.5785849094390869, "eval_runtime": 159.8624, "eval_samples_per_second": 22.838, "eval_steps_per_second": 1.432, "eval_wer": 0.3900335355937146, "step": 45000 }, { "epoch": 106.16, "learning_rate": 3.842803738317757e-05, "loss": 0.191, "step": 46500 }, { "epoch": 106.16, "eval_cer": 0.33468256739968055, "eval_loss": 0.5981589555740356, "eval_runtime": 159.9077, "eval_samples_per_second": 22.832, "eval_steps_per_second": 1.432, "eval_wer": 0.38939422828878745, "step": 46500 }, { "epoch": 109.59, "learning_rate": 3.7027102803738324e-05, "loss": 0.1851, "step": 48000 }, { "epoch": 109.59, "eval_cer": 0.32407380901944843, "eval_loss": 0.603424072265625, "eval_runtime": 159.431, "eval_samples_per_second": 22.9, "eval_steps_per_second": 1.436, "eval_wer": 0.3804551419374376, "step": 48000 }, { "epoch": 113.01, "learning_rate": 3.562616822429907e-05, "loss": 0.1822, "step": 49500 }, { "epoch": 113.01, "eval_cer": 0.30877612828983153, "eval_loss": 0.6061017513275146, "eval_runtime": 158.5159, "eval_samples_per_second": 23.032, "eval_steps_per_second": 1.445, "eval_wer": 0.3677250754270461, "step": 49500 }, { "epoch": 116.44, "learning_rate": 3.422523364485982e-05, "loss": 0.183, "step": 51000 }, { "epoch": 116.44, "eval_cer": 0.3065089570439751, "eval_loss": 0.6075730919837952, "eval_runtime": 160.2872, "eval_samples_per_second": 22.778, "eval_steps_per_second": 1.429, "eval_wer": 0.36392287935037404, "step": 51000 }, { "epoch": 119.86, "learning_rate": 3.2823364485981314e-05, "loss": 0.172, "step": 52500 }, { "epoch": 119.86, "eval_cer": 0.2992551540932174, "eval_loss": 0.6227861642837524, "eval_runtime": 160.5993, "eval_samples_per_second": 22.734, "eval_steps_per_second": 1.426, "eval_wer": 0.3576083177245146, "step": 52500 }, { "epoch": 123.29, "learning_rate": 3.14214953271028e-05, "loss": 0.1677, "step": 54000 }, { "epoch": 123.29, "eval_cer": 0.300377289356318, "eval_loss": 0.630536675453186, "eval_runtime": 158.8853, "eval_samples_per_second": 22.979, "eval_steps_per_second": 1.441, "eval_wer": 0.35942529638062337, "step": 54000 }, { "epoch": 126.71, "learning_rate": 3.002056074766355e-05, "loss": 0.1641, "step": 55500 }, { "epoch": 126.71, "eval_cer": 0.2999479008627846, "eval_loss": 0.6270943284034729, "eval_runtime": 160.9314, "eval_samples_per_second": 22.687, "eval_steps_per_second": 1.423, "eval_wer": 0.3598515012505748, "step": 55500 }, { "epoch": 130.14, "learning_rate": 2.861869158878505e-05, "loss": 0.1596, "step": 57000 }, { "epoch": 130.14, "eval_cer": 0.3011845397241608, "eval_loss": 0.6379602551460266, "eval_runtime": 160.483, "eval_samples_per_second": 22.75, "eval_steps_per_second": 1.427, "eval_wer": 0.3607712064962595, "step": 57000 }, { "epoch": 133.56, "learning_rate": 2.7217757009345797e-05, "loss": 0.1561, "step": 58500 }, { "epoch": 133.56, "eval_cer": 0.2984937051646848, "eval_loss": 0.6372924447059631, "eval_runtime": 149.4703, "eval_samples_per_second": 24.426, "eval_steps_per_second": 1.532, "eval_wer": 0.3564530782085936, "step": 58500 }, { "epoch": 136.99, "learning_rate": 2.5815887850467292e-05, "loss": 0.1531, "step": 60000 }, { "epoch": 136.99, "eval_cer": 0.29037540004694645, "eval_loss": 0.633769154548645, "eval_runtime": 149.3294, "eval_samples_per_second": 24.449, "eval_steps_per_second": 1.534, "eval_wer": 0.35054228961742506, "step": 60000 }, { "epoch": 140.41, "learning_rate": 2.441495327102804e-05, "loss": 0.1501, "step": 61500 }, { "epoch": 140.41, "eval_cer": 0.280087251741886, "eval_loss": 0.6684455275535583, "eval_runtime": 148.2869, "eval_samples_per_second": 24.621, "eval_steps_per_second": 1.544, "eval_wer": 0.34238831750019627, "step": 61500 }, { "epoch": 143.83, "learning_rate": 2.3013084112149535e-05, "loss": 0.1461, "step": 63000 }, { "epoch": 143.83, "eval_cer": 0.2885776935540199, "eval_loss": 0.653037428855896, "eval_runtime": 148.0728, "eval_samples_per_second": 24.657, "eval_steps_per_second": 1.547, "eval_wer": 0.34928610684283135, "step": 63000 }, { "epoch": 147.26, "learning_rate": 2.1612149532710283e-05, "loss": 0.1438, "step": 64500 }, { "epoch": 147.26, "eval_cer": 0.28241739996679394, "eval_loss": 0.6683825254440308, "eval_runtime": 149.486, "eval_samples_per_second": 24.424, "eval_steps_per_second": 1.532, "eval_wer": 0.3449006830493837, "step": 64500 }, { "epoch": 150.68, "learning_rate": 2.0210280373831778e-05, "loss": 0.1399, "step": 66000 }, { "epoch": 150.68, "eval_cer": 0.2820109121929157, "eval_loss": 0.6699740290641785, "eval_runtime": 148.9217, "eval_samples_per_second": 24.516, "eval_steps_per_second": 1.538, "eval_wer": 0.34547269484852905, "step": 66000 }, { "epoch": 154.11, "learning_rate": 1.8810280373831778e-05, "loss": 0.1384, "step": 67500 }, { "epoch": 154.11, "eval_cer": 0.2813067150635209, "eval_loss": 0.6824755668640137, "eval_runtime": 149.3793, "eval_samples_per_second": 24.441, "eval_steps_per_second": 1.533, "eval_wer": 0.34288181787592953, "step": 67500 }, { "epoch": 157.53, "learning_rate": 1.740841121495327e-05, "loss": 0.1356, "step": 69000 }, { "epoch": 157.53, "eval_cer": 0.28230862154843217, "eval_loss": 0.6769393086433411, "eval_runtime": 147.7768, "eval_samples_per_second": 24.706, "eval_steps_per_second": 1.55, "eval_wer": 0.34468758061440796, "step": 69000 }, { "epoch": 160.96, "learning_rate": 1.600654205607477e-05, "loss": 0.1327, "step": 70500 }, { "epoch": 160.96, "eval_cer": 0.2698334545163082, "eval_loss": 0.6889060735702515, "eval_runtime": 148.0859, "eval_samples_per_second": 24.655, "eval_steps_per_second": 1.546, "eval_wer": 0.3343016408887493, "step": 70500 }, { "epoch": 164.38, "learning_rate": 1.4604672897196264e-05, "loss": 0.1301, "step": 72000 }, { "epoch": 164.38, "eval_cer": 0.2741445149913836, "eval_loss": 0.667121171951294, "eval_runtime": 147.8905, "eval_samples_per_second": 24.687, "eval_steps_per_second": 1.548, "eval_wer": 0.33721777947262754, "step": 72000 }, { "epoch": 167.81, "learning_rate": 1.3204672897196262e-05, "loss": 0.1274, "step": 73500 }, { "epoch": 167.81, "eval_cer": 0.2703658962482896, "eval_loss": 0.6766042113304138, "eval_runtime": 157.2135, "eval_samples_per_second": 23.223, "eval_steps_per_second": 1.457, "eval_wer": 0.33454839107661594, "step": 73500 }, { "epoch": 171.23, "learning_rate": 1.180373831775701e-05, "loss": 0.1254, "step": 75000 }, { "epoch": 171.23, "eval_cer": 0.26700521563890145, "eval_loss": 0.6990349888801575, "eval_runtime": 147.5703, "eval_samples_per_second": 24.741, "eval_steps_per_second": 1.552, "eval_wer": 0.33097051335254996, "step": 75000 }, { "epoch": 174.66, "learning_rate": 1.0401869158878507e-05, "loss": 0.1232, "step": 76500 }, { "epoch": 174.66, "eval_cer": 0.26890025019036223, "eval_loss": 0.6996525526046753, "eval_runtime": 148.6819, "eval_samples_per_second": 24.556, "eval_steps_per_second": 1.54, "eval_wer": 0.3331015377023071, "step": 76500 }, { "epoch": 178.08, "learning_rate": 9e-06, "loss": 0.1214, "step": 78000 }, { "epoch": 178.08, "eval_cer": 0.2654765926019225, "eval_loss": 0.7195149660110474, "eval_runtime": 147.4964, "eval_samples_per_second": 24.753, "eval_steps_per_second": 1.553, "eval_wer": 0.33113875211700444, "step": 78000 }, { "epoch": 181.51, "learning_rate": 7.600000000000001e-06, "loss": 0.1192, "step": 79500 }, { "epoch": 181.51, "eval_cer": 0.2665872775051956, "eval_loss": 0.7197592854499817, "eval_runtime": 146.5503, "eval_samples_per_second": 24.913, "eval_steps_per_second": 1.563, "eval_wer": 0.3315873888222165, "step": 79500 }, { "epoch": 184.93, "learning_rate": 6.198130841121495e-06, "loss": 0.117, "step": 81000 }, { "epoch": 184.93, "eval_cer": 0.26343270337270347, "eval_loss": 0.7252310514450073, "eval_runtime": 147.1458, "eval_samples_per_second": 24.812, "eval_steps_per_second": 1.556, "eval_wer": 0.3294339326371987, "step": 81000 }, { "epoch": 188.36, "learning_rate": 4.796261682242991e-06, "loss": 0.1155, "step": 82500 }, { "epoch": 188.36, "eval_cer": 0.26273995660313626, "eval_loss": 0.7319917678833008, "eval_runtime": 145.3615, "eval_samples_per_second": 25.117, "eval_steps_per_second": 1.575, "eval_wer": 0.3283684204623201, "step": 82500 }, { "epoch": 191.78, "learning_rate": 3.3953271028037387e-06, "loss": 0.1146, "step": 84000 }, { "epoch": 191.78, "eval_cer": 0.2623048429296891, "eval_loss": 0.7414848208427429, "eval_runtime": 146.2983, "eval_samples_per_second": 24.956, "eval_steps_per_second": 1.565, "eval_wer": 0.32897408001435635, "step": 84000 }, { "epoch": 195.21, "learning_rate": 1.9934579439252336e-06, "loss": 0.1131, "step": 85500 }, { "epoch": 195.21, "eval_cer": 0.26136018824391555, "eval_loss": 0.7424480319023132, "eval_runtime": 146.0724, "eval_samples_per_second": 24.994, "eval_steps_per_second": 1.568, "eval_wer": 0.328177749862605, "step": 85500 }, { "epoch": 198.63, "learning_rate": 5.925233644859814e-07, "loss": 0.1127, "step": 87000 }, { "epoch": 198.63, "eval_cer": 0.261194158026416, "eval_loss": 0.7422526478767395, "eval_runtime": 146.2208, "eval_samples_per_second": 24.969, "eval_steps_per_second": 1.566, "eval_wer": 0.3272580446169203, "step": 87000 } ], "max_steps": 87600, "num_train_epochs": 200, "total_flos": 9.618016300986444e+20, "trial_name": null, "trial_params": null }