|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 198.62941847206386, |
|
"global_step": 87000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.988000000000001e-05, |
|
"loss": 1.7702, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_cer": 0.8902483010528606, |
|
"eval_loss": 1.379656434059143, |
|
"eval_runtime": 157.8178, |
|
"eval_samples_per_second": 23.134, |
|
"eval_steps_per_second": 1.451, |
|
"eval_wer": 0.9197613252728272, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 7.906822429906543e-05, |
|
"loss": 0.6423, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_cer": 0.8909467730023416, |
|
"eval_loss": 1.096530795097351, |
|
"eval_runtime": 155.6392, |
|
"eval_samples_per_second": 23.458, |
|
"eval_steps_per_second": 1.471, |
|
"eval_wer": 0.92135398557633, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 7.766728971962618e-05, |
|
"loss": 0.5752, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_cer": 0.8896414319820001, |
|
"eval_loss": 0.9504674077033997, |
|
"eval_runtime": 156.6885, |
|
"eval_samples_per_second": 23.301, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.9201763142251483, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 7.626728971962617e-05, |
|
"loss": 0.5136, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_cer": 0.8656071267039567, |
|
"eval_loss": 0.49109727144241333, |
|
"eval_runtime": 157.3097, |
|
"eval_samples_per_second": 23.209, |
|
"eval_steps_per_second": 1.456, |
|
"eval_wer": 0.8955349431913772, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 7.486542056074767e-05, |
|
"loss": 0.5015, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_cer": 0.818803780908815, |
|
"eval_loss": 0.4844619333744049, |
|
"eval_runtime": 157.3062, |
|
"eval_samples_per_second": 23.21, |
|
"eval_steps_per_second": 1.456, |
|
"eval_wer": 0.849381442142689, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 7.346355140186916e-05, |
|
"loss": 0.462, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_cer": 0.8027961778698895, |
|
"eval_loss": 0.4619074761867523, |
|
"eval_runtime": 157.0856, |
|
"eval_samples_per_second": 23.242, |
|
"eval_steps_per_second": 1.458, |
|
"eval_wer": 0.8343072488475645, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 7.206168224299066e-05, |
|
"loss": 0.444, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"eval_cer": 0.7654622796521381, |
|
"eval_loss": 0.4942396581172943, |
|
"eval_runtime": 156.8664, |
|
"eval_samples_per_second": 23.275, |
|
"eval_steps_per_second": 1.46, |
|
"eval_wer": 0.7982929373366682, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 7.066074766355141e-05, |
|
"loss": 0.4318, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"eval_cer": 0.7344947814985086, |
|
"eval_loss": 0.4564968943595886, |
|
"eval_runtime": 156.676, |
|
"eval_samples_per_second": 23.303, |
|
"eval_steps_per_second": 1.462, |
|
"eval_wer": 0.7668771520541953, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"learning_rate": 6.92588785046729e-05, |
|
"loss": 0.3877, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"eval_cer": 0.7305844836173977, |
|
"eval_loss": 0.44864970445632935, |
|
"eval_runtime": 156.7014, |
|
"eval_samples_per_second": 23.299, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.7626823988604627, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 34.25, |
|
"learning_rate": 6.78570093457944e-05, |
|
"loss": 0.3734, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 34.25, |
|
"eval_cer": 0.716420388510709, |
|
"eval_loss": 0.4351600408554077, |
|
"eval_runtime": 156.7512, |
|
"eval_samples_per_second": 23.292, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.7498626050090288, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"learning_rate": 6.64551401869159e-05, |
|
"loss": 0.3595, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"eval_cer": 0.6001820607212581, |
|
"eval_loss": 0.4632013738155365, |
|
"eval_runtime": 156.8539, |
|
"eval_samples_per_second": 23.276, |
|
"eval_steps_per_second": 1.46, |
|
"eval_wer": 0.6390493388216557, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 41.1, |
|
"learning_rate": 6.505327102803739e-05, |
|
"loss": 0.3466, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 41.1, |
|
"eval_cer": 0.592630548414984, |
|
"eval_loss": 0.4672980308532715, |
|
"eval_runtime": 155.7423, |
|
"eval_samples_per_second": 23.443, |
|
"eval_steps_per_second": 1.47, |
|
"eval_wer": 0.6305925369284088, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 44.52, |
|
"learning_rate": 6.365140186915889e-05, |
|
"loss": 0.3346, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 44.52, |
|
"eval_cer": 0.5607641970148912, |
|
"eval_loss": 0.48392337560653687, |
|
"eval_runtime": 156.8128, |
|
"eval_samples_per_second": 23.283, |
|
"eval_steps_per_second": 1.46, |
|
"eval_wer": 0.601790060453796, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 6.224953271028038e-05, |
|
"loss": 0.3224, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"eval_cer": 0.5324932586006516, |
|
"eval_loss": 0.47312837839126587, |
|
"eval_runtime": 155.7519, |
|
"eval_samples_per_second": 23.441, |
|
"eval_steps_per_second": 1.47, |
|
"eval_wer": 0.5755448131988918, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 51.37, |
|
"learning_rate": 6.0849532710280374e-05, |
|
"loss": 0.3115, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 51.37, |
|
"eval_cer": 0.5504359724504343, |
|
"eval_loss": 0.46229076385498047, |
|
"eval_runtime": 157.1658, |
|
"eval_samples_per_second": 23.23, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.5916060072454827, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 54.79, |
|
"learning_rate": 5.9448598130841125e-05, |
|
"loss": 0.3004, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 54.79, |
|
"eval_cer": 0.4698426148041702, |
|
"eval_loss": 0.4939606785774231, |
|
"eval_runtime": 157.0777, |
|
"eval_samples_per_second": 23.243, |
|
"eval_steps_per_second": 1.458, |
|
"eval_wer": 0.5165603023811393, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 58.22, |
|
"learning_rate": 5.804672897196262e-05, |
|
"loss": 0.2911, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 58.22, |
|
"eval_cer": 0.4784017587752695, |
|
"eval_loss": 0.49099990725517273, |
|
"eval_runtime": 155.9127, |
|
"eval_samples_per_second": 23.417, |
|
"eval_steps_per_second": 1.469, |
|
"eval_wer": 0.5228972958422593, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 61.64, |
|
"learning_rate": 5.664579439252337e-05, |
|
"loss": 0.2816, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 61.64, |
|
"eval_cer": 0.48053152570319524, |
|
"eval_loss": 0.48187437653541565, |
|
"eval_runtime": 157.1059, |
|
"eval_samples_per_second": 23.239, |
|
"eval_steps_per_second": 1.458, |
|
"eval_wer": 0.524153478616853, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 65.07, |
|
"learning_rate": 5.5244859813084115e-05, |
|
"loss": 0.2753, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 65.07, |
|
"eval_cer": 0.43459840725494797, |
|
"eval_loss": 0.5083651542663574, |
|
"eval_runtime": 156.7879, |
|
"eval_samples_per_second": 23.286, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.48194798057403065, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 68.49, |
|
"learning_rate": 5.384299065420561e-05, |
|
"loss": 0.2628, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 68.49, |
|
"eval_cer": 0.441067860557518, |
|
"eval_loss": 0.5106444954872131, |
|
"eval_runtime": 161.9652, |
|
"eval_samples_per_second": 22.542, |
|
"eval_steps_per_second": 1.414, |
|
"eval_wer": 0.4881952466941083, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 71.92, |
|
"learning_rate": 5.2441121495327106e-05, |
|
"loss": 0.2534, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 71.92, |
|
"eval_cer": 0.40500495228062544, |
|
"eval_loss": 0.555060088634491, |
|
"eval_runtime": 162.1242, |
|
"eval_samples_per_second": 22.52, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.45390818649827835, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 75.34, |
|
"learning_rate": 5.104018691588786e-05, |
|
"loss": 0.2464, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 75.34, |
|
"eval_cer": 0.4066767048154488, |
|
"eval_loss": 0.5396202206611633, |
|
"eval_runtime": 161.0534, |
|
"eval_samples_per_second": 22.669, |
|
"eval_steps_per_second": 1.422, |
|
"eval_wer": 0.45515315335524176, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"learning_rate": 4.96392523364486e-05, |
|
"loss": 0.2447, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"eval_cer": 0.38928360823738883, |
|
"eval_loss": 0.5427628755569458, |
|
"eval_runtime": 161.5749, |
|
"eval_samples_per_second": 22.596, |
|
"eval_steps_per_second": 1.417, |
|
"eval_wer": 0.4384190042508328, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 82.19, |
|
"learning_rate": 4.82392523364486e-05, |
|
"loss": 0.2311, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 82.19, |
|
"eval_cer": 0.3919687176169511, |
|
"eval_loss": 0.537956714630127, |
|
"eval_runtime": 161.1887, |
|
"eval_samples_per_second": 22.65, |
|
"eval_steps_per_second": 1.421, |
|
"eval_wer": 0.43954059601386286, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 85.62, |
|
"learning_rate": 4.6837383177570096e-05, |
|
"loss": 0.2238, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 85.62, |
|
"eval_cer": 0.36581037059089583, |
|
"eval_loss": 0.5633525848388672, |
|
"eval_runtime": 160.6641, |
|
"eval_samples_per_second": 22.724, |
|
"eval_steps_per_second": 1.425, |
|
"eval_wer": 0.4180508978342063, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 89.04, |
|
"learning_rate": 4.543644859813085e-05, |
|
"loss": 0.2176, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 89.04, |
|
"eval_cer": 0.34971688985326366, |
|
"eval_loss": 0.5895515084266663, |
|
"eval_runtime": 161.4249, |
|
"eval_samples_per_second": 22.617, |
|
"eval_steps_per_second": 1.419, |
|
"eval_wer": 0.4021355107168093, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 92.47, |
|
"learning_rate": 4.4034579439252336e-05, |
|
"loss": 0.2106, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 92.47, |
|
"eval_cer": 0.354417262562476, |
|
"eval_loss": 0.5802760124206543, |
|
"eval_runtime": 161.093, |
|
"eval_samples_per_second": 22.664, |
|
"eval_steps_per_second": 1.422, |
|
"eval_wer": 0.40675646878049326, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 95.89, |
|
"learning_rate": 4.263271028037384e-05, |
|
"loss": 0.2058, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 95.89, |
|
"eval_cer": 0.34699742939421874, |
|
"eval_loss": 0.5864582657814026, |
|
"eval_runtime": 161.933, |
|
"eval_samples_per_second": 22.546, |
|
"eval_steps_per_second": 1.414, |
|
"eval_wer": 0.400744736930652, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 99.31, |
|
"learning_rate": 4.1230841121495333e-05, |
|
"loss": 0.1995, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 99.31, |
|
"eval_cer": 0.33578752712303983, |
|
"eval_loss": 0.5888575315475464, |
|
"eval_runtime": 161.5039, |
|
"eval_samples_per_second": 22.606, |
|
"eval_steps_per_second": 1.418, |
|
"eval_wer": 0.3900335355937146, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 102.74, |
|
"learning_rate": 3.982990654205608e-05, |
|
"loss": 0.1956, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 102.74, |
|
"eval_cer": 0.3373562264194152, |
|
"eval_loss": 0.5785849094390869, |
|
"eval_runtime": 159.8624, |
|
"eval_samples_per_second": 22.838, |
|
"eval_steps_per_second": 1.432, |
|
"eval_wer": 0.3900335355937146, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 106.16, |
|
"learning_rate": 3.842803738317757e-05, |
|
"loss": 0.191, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 106.16, |
|
"eval_cer": 0.33468256739968055, |
|
"eval_loss": 0.5981589555740356, |
|
"eval_runtime": 159.9077, |
|
"eval_samples_per_second": 22.832, |
|
"eval_steps_per_second": 1.432, |
|
"eval_wer": 0.38939422828878745, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 109.59, |
|
"learning_rate": 3.7027102803738324e-05, |
|
"loss": 0.1851, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 109.59, |
|
"eval_cer": 0.32407380901944843, |
|
"eval_loss": 0.603424072265625, |
|
"eval_runtime": 159.431, |
|
"eval_samples_per_second": 22.9, |
|
"eval_steps_per_second": 1.436, |
|
"eval_wer": 0.3804551419374376, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 113.01, |
|
"learning_rate": 3.562616822429907e-05, |
|
"loss": 0.1822, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 113.01, |
|
"eval_cer": 0.30877612828983153, |
|
"eval_loss": 0.6061017513275146, |
|
"eval_runtime": 158.5159, |
|
"eval_samples_per_second": 23.032, |
|
"eval_steps_per_second": 1.445, |
|
"eval_wer": 0.3677250754270461, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 116.44, |
|
"learning_rate": 3.422523364485982e-05, |
|
"loss": 0.183, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 116.44, |
|
"eval_cer": 0.3065089570439751, |
|
"eval_loss": 0.6075730919837952, |
|
"eval_runtime": 160.2872, |
|
"eval_samples_per_second": 22.778, |
|
"eval_steps_per_second": 1.429, |
|
"eval_wer": 0.36392287935037404, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 119.86, |
|
"learning_rate": 3.2823364485981314e-05, |
|
"loss": 0.172, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 119.86, |
|
"eval_cer": 0.2992551540932174, |
|
"eval_loss": 0.6227861642837524, |
|
"eval_runtime": 160.5993, |
|
"eval_samples_per_second": 22.734, |
|
"eval_steps_per_second": 1.426, |
|
"eval_wer": 0.3576083177245146, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 123.29, |
|
"learning_rate": 3.14214953271028e-05, |
|
"loss": 0.1677, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 123.29, |
|
"eval_cer": 0.300377289356318, |
|
"eval_loss": 0.630536675453186, |
|
"eval_runtime": 158.8853, |
|
"eval_samples_per_second": 22.979, |
|
"eval_steps_per_second": 1.441, |
|
"eval_wer": 0.35942529638062337, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 126.71, |
|
"learning_rate": 3.002056074766355e-05, |
|
"loss": 0.1641, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 126.71, |
|
"eval_cer": 0.2999479008627846, |
|
"eval_loss": 0.6270943284034729, |
|
"eval_runtime": 160.9314, |
|
"eval_samples_per_second": 22.687, |
|
"eval_steps_per_second": 1.423, |
|
"eval_wer": 0.3598515012505748, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 130.14, |
|
"learning_rate": 2.861869158878505e-05, |
|
"loss": 0.1596, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 130.14, |
|
"eval_cer": 0.3011845397241608, |
|
"eval_loss": 0.6379602551460266, |
|
"eval_runtime": 160.483, |
|
"eval_samples_per_second": 22.75, |
|
"eval_steps_per_second": 1.427, |
|
"eval_wer": 0.3607712064962595, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 133.56, |
|
"learning_rate": 2.7217757009345797e-05, |
|
"loss": 0.1561, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 133.56, |
|
"eval_cer": 0.2984937051646848, |
|
"eval_loss": 0.6372924447059631, |
|
"eval_runtime": 149.4703, |
|
"eval_samples_per_second": 24.426, |
|
"eval_steps_per_second": 1.532, |
|
"eval_wer": 0.3564530782085936, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"learning_rate": 2.5815887850467292e-05, |
|
"loss": 0.1531, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"eval_cer": 0.29037540004694645, |
|
"eval_loss": 0.633769154548645, |
|
"eval_runtime": 149.3294, |
|
"eval_samples_per_second": 24.449, |
|
"eval_steps_per_second": 1.534, |
|
"eval_wer": 0.35054228961742506, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 140.41, |
|
"learning_rate": 2.441495327102804e-05, |
|
"loss": 0.1501, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 140.41, |
|
"eval_cer": 0.280087251741886, |
|
"eval_loss": 0.6684455275535583, |
|
"eval_runtime": 148.2869, |
|
"eval_samples_per_second": 24.621, |
|
"eval_steps_per_second": 1.544, |
|
"eval_wer": 0.34238831750019627, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 143.83, |
|
"learning_rate": 2.3013084112149535e-05, |
|
"loss": 0.1461, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 143.83, |
|
"eval_cer": 0.2885776935540199, |
|
"eval_loss": 0.653037428855896, |
|
"eval_runtime": 148.0728, |
|
"eval_samples_per_second": 24.657, |
|
"eval_steps_per_second": 1.547, |
|
"eval_wer": 0.34928610684283135, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 147.26, |
|
"learning_rate": 2.1612149532710283e-05, |
|
"loss": 0.1438, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 147.26, |
|
"eval_cer": 0.28241739996679394, |
|
"eval_loss": 0.6683825254440308, |
|
"eval_runtime": 149.486, |
|
"eval_samples_per_second": 24.424, |
|
"eval_steps_per_second": 1.532, |
|
"eval_wer": 0.3449006830493837, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 150.68, |
|
"learning_rate": 2.0210280373831778e-05, |
|
"loss": 0.1399, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 150.68, |
|
"eval_cer": 0.2820109121929157, |
|
"eval_loss": 0.6699740290641785, |
|
"eval_runtime": 148.9217, |
|
"eval_samples_per_second": 24.516, |
|
"eval_steps_per_second": 1.538, |
|
"eval_wer": 0.34547269484852905, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 154.11, |
|
"learning_rate": 1.8810280373831778e-05, |
|
"loss": 0.1384, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 154.11, |
|
"eval_cer": 0.2813067150635209, |
|
"eval_loss": 0.6824755668640137, |
|
"eval_runtime": 149.3793, |
|
"eval_samples_per_second": 24.441, |
|
"eval_steps_per_second": 1.533, |
|
"eval_wer": 0.34288181787592953, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 157.53, |
|
"learning_rate": 1.740841121495327e-05, |
|
"loss": 0.1356, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 157.53, |
|
"eval_cer": 0.28230862154843217, |
|
"eval_loss": 0.6769393086433411, |
|
"eval_runtime": 147.7768, |
|
"eval_samples_per_second": 24.706, |
|
"eval_steps_per_second": 1.55, |
|
"eval_wer": 0.34468758061440796, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 160.96, |
|
"learning_rate": 1.600654205607477e-05, |
|
"loss": 0.1327, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 160.96, |
|
"eval_cer": 0.2698334545163082, |
|
"eval_loss": 0.6889060735702515, |
|
"eval_runtime": 148.0859, |
|
"eval_samples_per_second": 24.655, |
|
"eval_steps_per_second": 1.546, |
|
"eval_wer": 0.3343016408887493, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 164.38, |
|
"learning_rate": 1.4604672897196264e-05, |
|
"loss": 0.1301, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 164.38, |
|
"eval_cer": 0.2741445149913836, |
|
"eval_loss": 0.667121171951294, |
|
"eval_runtime": 147.8905, |
|
"eval_samples_per_second": 24.687, |
|
"eval_steps_per_second": 1.548, |
|
"eval_wer": 0.33721777947262754, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 167.81, |
|
"learning_rate": 1.3204672897196262e-05, |
|
"loss": 0.1274, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 167.81, |
|
"eval_cer": 0.2703658962482896, |
|
"eval_loss": 0.6766042113304138, |
|
"eval_runtime": 157.2135, |
|
"eval_samples_per_second": 23.223, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.33454839107661594, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 171.23, |
|
"learning_rate": 1.180373831775701e-05, |
|
"loss": 0.1254, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 171.23, |
|
"eval_cer": 0.26700521563890145, |
|
"eval_loss": 0.6990349888801575, |
|
"eval_runtime": 147.5703, |
|
"eval_samples_per_second": 24.741, |
|
"eval_steps_per_second": 1.552, |
|
"eval_wer": 0.33097051335254996, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 174.66, |
|
"learning_rate": 1.0401869158878507e-05, |
|
"loss": 0.1232, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 174.66, |
|
"eval_cer": 0.26890025019036223, |
|
"eval_loss": 0.6996525526046753, |
|
"eval_runtime": 148.6819, |
|
"eval_samples_per_second": 24.556, |
|
"eval_steps_per_second": 1.54, |
|
"eval_wer": 0.3331015377023071, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 178.08, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1214, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 178.08, |
|
"eval_cer": 0.2654765926019225, |
|
"eval_loss": 0.7195149660110474, |
|
"eval_runtime": 147.4964, |
|
"eval_samples_per_second": 24.753, |
|
"eval_steps_per_second": 1.553, |
|
"eval_wer": 0.33113875211700444, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 181.51, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.1192, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 181.51, |
|
"eval_cer": 0.2665872775051956, |
|
"eval_loss": 0.7197592854499817, |
|
"eval_runtime": 146.5503, |
|
"eval_samples_per_second": 24.913, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wer": 0.3315873888222165, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 184.93, |
|
"learning_rate": 6.198130841121495e-06, |
|
"loss": 0.117, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 184.93, |
|
"eval_cer": 0.26343270337270347, |
|
"eval_loss": 0.7252310514450073, |
|
"eval_runtime": 147.1458, |
|
"eval_samples_per_second": 24.812, |
|
"eval_steps_per_second": 1.556, |
|
"eval_wer": 0.3294339326371987, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 188.36, |
|
"learning_rate": 4.796261682242991e-06, |
|
"loss": 0.1155, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 188.36, |
|
"eval_cer": 0.26273995660313626, |
|
"eval_loss": 0.7319917678833008, |
|
"eval_runtime": 145.3615, |
|
"eval_samples_per_second": 25.117, |
|
"eval_steps_per_second": 1.575, |
|
"eval_wer": 0.3283684204623201, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 191.78, |
|
"learning_rate": 3.3953271028037387e-06, |
|
"loss": 0.1146, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 191.78, |
|
"eval_cer": 0.2623048429296891, |
|
"eval_loss": 0.7414848208427429, |
|
"eval_runtime": 146.2983, |
|
"eval_samples_per_second": 24.956, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wer": 0.32897408001435635, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 195.21, |
|
"learning_rate": 1.9934579439252336e-06, |
|
"loss": 0.1131, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 195.21, |
|
"eval_cer": 0.26136018824391555, |
|
"eval_loss": 0.7424480319023132, |
|
"eval_runtime": 146.0724, |
|
"eval_samples_per_second": 24.994, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wer": 0.328177749862605, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 198.63, |
|
"learning_rate": 5.925233644859814e-07, |
|
"loss": 0.1127, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 198.63, |
|
"eval_cer": 0.261194158026416, |
|
"eval_loss": 0.7422526478767395, |
|
"eval_runtime": 146.2208, |
|
"eval_samples_per_second": 24.969, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wer": 0.3272580446169203, |
|
"step": 87000 |
|
} |
|
], |
|
"max_steps": 87600, |
|
"num_train_epochs": 200, |
|
"total_flos": 9.618016300986444e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|