|
{ |
|
"best_metric": 0.15353840099691743, |
|
"best_model_checkpoint": "/scratch/lingjzhu_root/lingjzhu1/lingjzhu/g2p/byt5_small_finetuned_from_pretrained/checkpoint-5000", |
|
"epoch": 9.646302250803858, |
|
"global_step": 135000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003, |
|
"loss": 1.3832, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002999616623572683, |
|
"loss": 0.2586, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00029984666902607135, |
|
"loss": 0.1824, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002996550787873857, |
|
"loss": 0.152, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002993869895761197, |
|
"loss": 0.1328, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_cer": 0.15353840099691743, |
|
"eval_loss": 0.16419939696788788, |
|
"eval_runtime": 90.9116, |
|
"eval_samples_per_second": 54.449, |
|
"eval_steps_per_second": 0.429, |
|
"eval_wer": 0.40606060606060607, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000299042538431052, |
|
"loss": 0.1171, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00029862190142478177, |
|
"loss": 0.1067, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00029812529357372587, |
|
"loss": 0.0979, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00029755296872820933, |
|
"loss": 0.0911, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002969052194427048, |
|
"loss": 0.0857, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_cer": 0.1219912113858464, |
|
"eval_loss": 0.10491432994604111, |
|
"eval_runtime": 90.8023, |
|
"eval_samples_per_second": 54.514, |
|
"eval_steps_per_second": 0.43, |
|
"eval_wer": 0.3408080808080808, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002961823768262882, |
|
"loss": 0.082, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002953848103733858, |
|
"loss": 0.0774, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00029451292777490066, |
|
"loss": 0.0745, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002935671747098137, |
|
"loss": 0.0708, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00029254803461736643, |
|
"loss": 0.0673, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_cer": 0.11350866837192015, |
|
"eval_loss": 0.08455779403448105, |
|
"eval_runtime": 91.1047, |
|
"eval_samples_per_second": 54.333, |
|
"eval_steps_per_second": 0.428, |
|
"eval_wer": 0.31757575757575757, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00029145602844994243, |
|
"loss": 0.065, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0002902917144067724, |
|
"loss": 0.0625, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00028905568764860047, |
|
"loss": 0.0611, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00028774857999345685, |
|
"loss": 0.0599, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002863710595936922, |
|
"loss": 0.0585, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_cer": 0.10635972103801841, |
|
"eval_loss": 0.0741606280207634, |
|
"eval_runtime": 90.9345, |
|
"eval_samples_per_second": 54.435, |
|
"eval_steps_per_second": 0.429, |
|
"eval_wer": 0.29757575757575755, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002849238305944389, |
|
"loss": 0.057, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00028340763277367477, |
|
"loss": 0.0557, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002818232411640713, |
|
"loss": 0.0548, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00028017146565682144, |
|
"loss": 0.0538, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00027845315058764886, |
|
"loss": 0.0526, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_cer": 0.10283990293172428, |
|
"eval_loss": 0.06704817712306976, |
|
"eval_runtime": 91.2941, |
|
"eval_samples_per_second": 54.22, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.2898989898989899, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00027666917430520975, |
|
"loss": 0.0519, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00027482044872210895, |
|
"loss": 0.0506, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002729079188487587, |
|
"loss": 0.0498, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00027093256231031885, |
|
"loss": 0.0468, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00026889538884696597, |
|
"loss": 0.0463, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_cer": 0.10310224962287663, |
|
"eval_loss": 0.0634964108467102, |
|
"eval_runtime": 91.3121, |
|
"eval_samples_per_second": 54.21, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.2896969696969697, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0002667974397977457, |
|
"loss": 0.0461, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0002646397875682729, |
|
"loss": 0.0454, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00026242353508255185, |
|
"loss": 0.0448, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0002601498152191957, |
|
"loss": 0.0446, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002578197902323352, |
|
"loss": 0.0443, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_cer": 0.1002382982444634, |
|
"eval_loss": 0.06088022515177727, |
|
"eval_runtime": 91.0524, |
|
"eval_samples_per_second": 54.364, |
|
"eval_steps_per_second": 0.428, |
|
"eval_wer": 0.282020202020202, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00025543465115751026, |
|
"loss": 0.0434, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0002529956172028505, |
|
"loss": 0.0431, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0002505039351258541, |
|
"loss": 0.0426, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002479608785960846, |
|
"loss": 0.0417, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0002453677475441111, |
|
"loss": 0.0415, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_cer": 0.09859863142476115, |
|
"eval_loss": 0.05760909989476204, |
|
"eval_runtime": 91.2335, |
|
"eval_samples_per_second": 54.256, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.27494949494949494, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00024272586749702474, |
|
"loss": 0.0411, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002400365889008706, |
|
"loss": 0.0408, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00023730128643034235, |
|
"loss": 0.0377, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00023452135828609167, |
|
"loss": 0.0381, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002316982254800121, |
|
"loss": 0.0379, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_cer": 0.09984477820773485, |
|
"eval_loss": 0.056990817189216614, |
|
"eval_runtime": 91.2394, |
|
"eval_samples_per_second": 54.253, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.27595959595959596, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00022883333110886237, |
|
"loss": 0.0375, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00022592813961660067, |
|
"loss": 0.0372, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00022298413604580696, |
|
"loss": 0.037, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00022000282527857588, |
|
"loss": 0.0367, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002169857312672683, |
|
"loss": 0.0366, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_cer": 0.09800835136966835, |
|
"eval_loss": 0.05443936586380005, |
|
"eval_runtime": 91.3792, |
|
"eval_samples_per_second": 54.17, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.27434343434343433, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00021393439625551483, |
|
"loss": 0.0365, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00021085037998986924, |
|
"loss": 0.036, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00020773525892251514, |
|
"loss": 0.0361, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00020459062540543316, |
|
"loss": 0.0358, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00020141808687644067, |
|
"loss": 0.0355, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_cer": 0.09678406681095734, |
|
"eval_loss": 0.052644312381744385, |
|
"eval_runtime": 91.3207, |
|
"eval_samples_per_second": 54.205, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.27090909090909093, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00019821926503751995, |
|
"loss": 0.0351, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00019499579502585537, |
|
"loss": 0.0327, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00019174932457800242, |
|
"loss": 0.0326, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0001884815131876167, |
|
"loss": 0.0327, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00018519403125717278, |
|
"loss": 0.0324, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_cer": 0.09630309787717803, |
|
"eval_loss": 0.05228239297866821, |
|
"eval_runtime": 91.3618, |
|
"eval_samples_per_second": 54.18, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.26686868686868687, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00018188855924410722, |
|
"loss": 0.0326, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00017856678680182127, |
|
"loss": 0.0323, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001752304119159834, |
|
"loss": 0.0322, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00017188114003657205, |
|
"loss": 0.0323, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00016852068320610358, |
|
"loss": 0.0316, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_cer": 0.09446667103911152, |
|
"eval_loss": 0.050846464931964874, |
|
"eval_runtime": 91.2629, |
|
"eval_samples_per_second": 54.239, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.2638383838383838, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00016515075918448972, |
|
"loss": 0.0318, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00016177309057097285, |
|
"loss": 0.0315, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.00015838940392358722, |
|
"loss": 0.0315, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00015500142887659688, |
|
"loss": 0.0313, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00015161089725636095, |
|
"loss": 0.0313, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.09420432434795917, |
|
"eval_loss": 0.048644062131643295, |
|
"eval_runtime": 91.4526, |
|
"eval_samples_per_second": 54.126, |
|
"eval_steps_per_second": 0.426, |
|
"eval_wer": 0.26141414141414143, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00014821954219607845, |
|
"loss": 0.0289, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0001448290972498651, |
|
"loss": 0.029, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00014144129550661485, |
|
"loss": 0.0288, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0001380578687040995, |
|
"loss": 0.0289, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00013468054634375843, |
|
"loss": 0.0292, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_cer": 0.0939638398810695, |
|
"eval_loss": 0.049041230231523514, |
|
"eval_runtime": 91.4249, |
|
"eval_samples_per_second": 54.143, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.258989898989899, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00013131105480663235, |
|
"loss": 0.0289, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.000127951116470891, |
|
"loss": 0.0288, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00012460244883140783, |
|
"loss": 0.0287, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.0001212667636218309, |
|
"loss": 0.0284, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00011794576593959775, |
|
"loss": 0.0283, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_cer": 0.0939419776568068, |
|
"eval_loss": 0.04810198023915291, |
|
"eval_runtime": 91.3828, |
|
"eval_samples_per_second": 54.168, |
|
"eval_steps_per_second": 0.427, |
|
"eval_wer": 0.2604040404040404, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00011464115337434394, |
|
"loss": 0.029, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00011135461514014796, |
|
"loss": 0.0289, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00010808783121205837, |
|
"loss": 0.0285, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00010484247146734352, |
|
"loss": 0.029, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00010162019483190237, |
|
"loss": 0.0282, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_cer": 0.09354845762007827, |
|
"eval_loss": 0.048511628061532974, |
|
"eval_runtime": 93.8707, |
|
"eval_samples_per_second": 52.732, |
|
"eval_steps_per_second": 0.415, |
|
"eval_wer": 0.25696969696969696, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 9.842264843227404e-05, |
|
"loss": 0.0282, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 9.52514667536784e-05, |
|
"loss": 0.0281, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 9.210827080451842e-05, |
|
"loss": 0.0281, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 8.899466728777203e-05, |
|
"loss": 0.0279, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 8.591224777969557e-05, |
|
"loss": 0.028, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_cer": 0.09381080431123062, |
|
"eval_loss": 0.047929223626852036, |
|
"eval_runtime": 93.5944, |
|
"eval_samples_per_second": 52.888, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.2608080808080808, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 8.286258791626041e-05, |
|
"loss": 0.028, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 7.984724658773716e-05, |
|
"loss": 0.0276, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 7.686776514184009e-05, |
|
"loss": 0.0275, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 7.392566659583846e-05, |
|
"loss": 0.0274, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 7.102245485803813e-05, |
|
"loss": 0.0276, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_cer": 0.09346100872302748, |
|
"eval_loss": 0.04663492366671562, |
|
"eval_runtime": 93.7458, |
|
"eval_samples_per_second": 52.802, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.25737373737373737, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 6.81596139590308e-05, |
|
"loss": 0.0273, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 6.533860729310434e-05, |
|
"loss": 0.0275, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.256087687020127e-05, |
|
"loss": 0.0271, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5.98278425788092e-05, |
|
"loss": 0.0256, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 5.71409014601578e-05, |
|
"loss": 0.0257, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_cer": 0.09276141754662119, |
|
"eval_loss": 0.04688685014843941, |
|
"eval_runtime": 93.8485, |
|
"eval_samples_per_second": 52.745, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.25696969696969696, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 5.4501426994095876e-05, |
|
"loss": 0.0261, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 5.191076839701103e-05, |
|
"loss": 0.0262, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 4.9370249932153075e-05, |
|
"loss": 0.0261, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 4.6881170232712164e-05, |
|
"loss": 0.0259, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.444480163799822e-05, |
|
"loss": 0.0261, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_cer": 0.09184320412758794, |
|
"eval_loss": 0.04592433199286461, |
|
"eval_runtime": 93.414, |
|
"eval_samples_per_second": 52.99, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.25515151515151513, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 4.2062389543061265e-05, |
|
"loss": 0.0257, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.9735151762084384e-05, |
|
"loss": 0.0258, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.746427790587557e-05, |
|
"loss": 0.0258, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.525092877377602e-05, |
|
"loss": 0.0258, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 3.309623576029597e-05, |
|
"loss": 0.0258, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_cer": 0.09195251524890143, |
|
"eval_loss": 0.04556591063737869, |
|
"eval_runtime": 93.7142, |
|
"eval_samples_per_second": 52.82, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.25333333333333335, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 3.1001300276781274e-05, |
|
"loss": 0.0258, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.8967193188406938e-05, |
|
"loss": 0.0255, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.699495426678389e-05, |
|
"loss": 0.0241, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 2.5085591658461056e-05, |
|
"loss": 0.0243, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 2.3240081369591984e-05, |
|
"loss": 0.0249, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_cer": 0.09116547517544435, |
|
"eval_loss": 0.04581727460026741, |
|
"eval_runtime": 93.453, |
|
"eval_samples_per_second": 52.968, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.25292929292929295, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 2.1459366767031522e-05, |
|
"loss": 0.0247, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 1.9744358096116225e-05, |
|
"loss": 0.0249, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.8095932015375496e-05, |
|
"loss": 0.0247, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 1.65149311484114e-05, |
|
"loss": 0.0246, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.500216365317587e-05, |
|
"loss": 0.025, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_cer": 0.09134037296954592, |
|
"eval_loss": 0.045206133276224136, |
|
"eval_runtime": 93.5201, |
|
"eval_samples_per_second": 52.93, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.25191919191919193, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.355840280886582e-05, |
|
"loss": 0.025, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 1.2184386620647097e-05, |
|
"loss": 0.0247, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.0880817442409478e-05, |
|
"loss": 0.0248, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 9.648361617745371e-06, |
|
"loss": 0.0248, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 8.487649139335962e-06, |
|
"loss": 0.0248, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_cer": 0.09169016855774907, |
|
"eval_loss": 0.04524415731430054, |
|
"eval_runtime": 93.2177, |
|
"eval_samples_per_second": 53.101, |
|
"eval_steps_per_second": 0.418, |
|
"eval_wer": 0.25292929292929295, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 7.399273326918692e-06, |
|
"loss": 0.0246, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 6.383790524001009e-06, |
|
"loss": 0.024, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 5.441719813474849e-06, |
|
"loss": 0.0241, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 4.57354275227797e-06, |
|
"loss": 0.0242, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 3.7797031252369767e-06, |
|
"loss": 0.0243, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"eval_cer": 0.09138409741807131, |
|
"eval_loss": 0.0453341044485569, |
|
"eval_runtime": 93.6155, |
|
"eval_samples_per_second": 52.876, |
|
"eval_steps_per_second": 0.417, |
|
"eval_wer": 0.25212121212121213, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.0606067182186776e-06, |
|
"loss": 0.024, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.4166211107049584e-06, |
|
"loss": 0.0242, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 1.8480754878977489e-06, |
|
"loss": 0.0241, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.3552604724498928e-06, |
|
"loss": 0.0241, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 9.384279759080127e-07, |
|
"loss": 0.024, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_cer": 0.09129664852102053, |
|
"eval_loss": 0.04531262069940567, |
|
"eval_runtime": 93.4097, |
|
"eval_samples_per_second": 52.992, |
|
"eval_steps_per_second": 0.418, |
|
"eval_wer": 0.25212121212121213, |
|
"step": 135000 |
|
} |
|
], |
|
"max_steps": 139950, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.19385405764928e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|