g2p_multilingual_byT5_small / trainer_state.json
charsiu's picture
Upload trainer_state.json
834df67
{
"best_metric": 0.15353840099691743,
"best_model_checkpoint": "/scratch/lingjzhu_root/lingjzhu1/lingjzhu/g2p/byt5_small_finetuned_from_pretrained/checkpoint-5000",
"epoch": 9.646302250803858,
"global_step": 135000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 0.0003,
"loss": 1.3832,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 0.0002999616623572683,
"loss": 0.2586,
"step": 2000
},
{
"epoch": 0.21,
"learning_rate": 0.00029984666902607135,
"loss": 0.1824,
"step": 3000
},
{
"epoch": 0.29,
"learning_rate": 0.0002996550787873857,
"loss": 0.152,
"step": 4000
},
{
"epoch": 0.36,
"learning_rate": 0.0002993869895761197,
"loss": 0.1328,
"step": 5000
},
{
"epoch": 0.36,
"eval_cer": 0.15353840099691743,
"eval_loss": 0.16419939696788788,
"eval_runtime": 90.9116,
"eval_samples_per_second": 54.449,
"eval_steps_per_second": 0.429,
"eval_wer": 0.40606060606060607,
"step": 5000
},
{
"epoch": 0.43,
"learning_rate": 0.000299042538431052,
"loss": 0.1171,
"step": 6000
},
{
"epoch": 0.5,
"learning_rate": 0.00029862190142478177,
"loss": 0.1067,
"step": 7000
},
{
"epoch": 0.57,
"learning_rate": 0.00029812529357372587,
"loss": 0.0979,
"step": 8000
},
{
"epoch": 0.64,
"learning_rate": 0.00029755296872820933,
"loss": 0.0911,
"step": 9000
},
{
"epoch": 0.71,
"learning_rate": 0.0002969052194427048,
"loss": 0.0857,
"step": 10000
},
{
"epoch": 0.71,
"eval_cer": 0.1219912113858464,
"eval_loss": 0.10491432994604111,
"eval_runtime": 90.8023,
"eval_samples_per_second": 54.514,
"eval_steps_per_second": 0.43,
"eval_wer": 0.3408080808080808,
"step": 10000
},
{
"epoch": 0.79,
"learning_rate": 0.0002961823768262882,
"loss": 0.082,
"step": 11000
},
{
"epoch": 0.86,
"learning_rate": 0.0002953848103733858,
"loss": 0.0774,
"step": 12000
},
{
"epoch": 0.93,
"learning_rate": 0.00029451292777490066,
"loss": 0.0745,
"step": 13000
},
{
"epoch": 1.0,
"learning_rate": 0.0002935671747098137,
"loss": 0.0708,
"step": 14000
},
{
"epoch": 1.07,
"learning_rate": 0.00029254803461736643,
"loss": 0.0673,
"step": 15000
},
{
"epoch": 1.07,
"eval_cer": 0.11350866837192015,
"eval_loss": 0.08455779403448105,
"eval_runtime": 91.1047,
"eval_samples_per_second": 54.333,
"eval_steps_per_second": 0.428,
"eval_wer": 0.31757575757575757,
"step": 15000
},
{
"epoch": 1.14,
"learning_rate": 0.00029145602844994243,
"loss": 0.065,
"step": 16000
},
{
"epoch": 1.21,
"learning_rate": 0.0002902917144067724,
"loss": 0.0625,
"step": 17000
},
{
"epoch": 1.29,
"learning_rate": 0.00028905568764860047,
"loss": 0.0611,
"step": 18000
},
{
"epoch": 1.36,
"learning_rate": 0.00028774857999345685,
"loss": 0.0599,
"step": 19000
},
{
"epoch": 1.43,
"learning_rate": 0.0002863710595936922,
"loss": 0.0585,
"step": 20000
},
{
"epoch": 1.43,
"eval_cer": 0.10635972103801841,
"eval_loss": 0.0741606280207634,
"eval_runtime": 90.9345,
"eval_samples_per_second": 54.435,
"eval_steps_per_second": 0.429,
"eval_wer": 0.29757575757575755,
"step": 20000
},
{
"epoch": 1.5,
"learning_rate": 0.0002849238305944389,
"loss": 0.057,
"step": 21000
},
{
"epoch": 1.57,
"learning_rate": 0.00028340763277367477,
"loss": 0.0557,
"step": 22000
},
{
"epoch": 1.64,
"learning_rate": 0.0002818232411640713,
"loss": 0.0548,
"step": 23000
},
{
"epoch": 1.71,
"learning_rate": 0.00028017146565682144,
"loss": 0.0538,
"step": 24000
},
{
"epoch": 1.79,
"learning_rate": 0.00027845315058764886,
"loss": 0.0526,
"step": 25000
},
{
"epoch": 1.79,
"eval_cer": 0.10283990293172428,
"eval_loss": 0.06704817712306976,
"eval_runtime": 91.2941,
"eval_samples_per_second": 54.22,
"eval_steps_per_second": 0.427,
"eval_wer": 0.2898989898989899,
"step": 25000
},
{
"epoch": 1.86,
"learning_rate": 0.00027666917430520975,
"loss": 0.0519,
"step": 26000
},
{
"epoch": 1.93,
"learning_rate": 0.00027482044872210895,
"loss": 0.0506,
"step": 27000
},
{
"epoch": 2.0,
"learning_rate": 0.0002729079188487587,
"loss": 0.0498,
"step": 28000
},
{
"epoch": 2.07,
"learning_rate": 0.00027093256231031885,
"loss": 0.0468,
"step": 29000
},
{
"epoch": 2.14,
"learning_rate": 0.00026889538884696597,
"loss": 0.0463,
"step": 30000
},
{
"epoch": 2.14,
"eval_cer": 0.10310224962287663,
"eval_loss": 0.0634964108467102,
"eval_runtime": 91.3121,
"eval_samples_per_second": 54.21,
"eval_steps_per_second": 0.427,
"eval_wer": 0.2896969696969697,
"step": 30000
},
{
"epoch": 2.22,
"learning_rate": 0.0002667974397977457,
"loss": 0.0461,
"step": 31000
},
{
"epoch": 2.29,
"learning_rate": 0.0002646397875682729,
"loss": 0.0454,
"step": 32000
},
{
"epoch": 2.36,
"learning_rate": 0.00026242353508255185,
"loss": 0.0448,
"step": 33000
},
{
"epoch": 2.43,
"learning_rate": 0.0002601498152191957,
"loss": 0.0446,
"step": 34000
},
{
"epoch": 2.5,
"learning_rate": 0.0002578197902323352,
"loss": 0.0443,
"step": 35000
},
{
"epoch": 2.5,
"eval_cer": 0.1002382982444634,
"eval_loss": 0.06088022515177727,
"eval_runtime": 91.0524,
"eval_samples_per_second": 54.364,
"eval_steps_per_second": 0.428,
"eval_wer": 0.282020202020202,
"step": 35000
},
{
"epoch": 2.57,
"learning_rate": 0.00025543465115751026,
"loss": 0.0434,
"step": 36000
},
{
"epoch": 2.64,
"learning_rate": 0.0002529956172028505,
"loss": 0.0431,
"step": 37000
},
{
"epoch": 2.72,
"learning_rate": 0.0002505039351258541,
"loss": 0.0426,
"step": 38000
},
{
"epoch": 2.79,
"learning_rate": 0.0002479608785960846,
"loss": 0.0417,
"step": 39000
},
{
"epoch": 2.86,
"learning_rate": 0.0002453677475441111,
"loss": 0.0415,
"step": 40000
},
{
"epoch": 2.86,
"eval_cer": 0.09859863142476115,
"eval_loss": 0.05760909989476204,
"eval_runtime": 91.2335,
"eval_samples_per_second": 54.256,
"eval_steps_per_second": 0.427,
"eval_wer": 0.27494949494949494,
"step": 40000
},
{
"epoch": 2.93,
"learning_rate": 0.00024272586749702474,
"loss": 0.0411,
"step": 41000
},
{
"epoch": 3.0,
"learning_rate": 0.0002400365889008706,
"loss": 0.0408,
"step": 42000
},
{
"epoch": 3.07,
"learning_rate": 0.00023730128643034235,
"loss": 0.0377,
"step": 43000
},
{
"epoch": 3.14,
"learning_rate": 0.00023452135828609167,
"loss": 0.0381,
"step": 44000
},
{
"epoch": 3.22,
"learning_rate": 0.0002316982254800121,
"loss": 0.0379,
"step": 45000
},
{
"epoch": 3.22,
"eval_cer": 0.09984477820773485,
"eval_loss": 0.056990817189216614,
"eval_runtime": 91.2394,
"eval_samples_per_second": 54.253,
"eval_steps_per_second": 0.427,
"eval_wer": 0.27595959595959596,
"step": 45000
},
{
"epoch": 3.29,
"learning_rate": 0.00022883333110886237,
"loss": 0.0375,
"step": 46000
},
{
"epoch": 3.36,
"learning_rate": 0.00022592813961660067,
"loss": 0.0372,
"step": 47000
},
{
"epoch": 3.43,
"learning_rate": 0.00022298413604580696,
"loss": 0.037,
"step": 48000
},
{
"epoch": 3.5,
"learning_rate": 0.00022000282527857588,
"loss": 0.0367,
"step": 49000
},
{
"epoch": 3.57,
"learning_rate": 0.0002169857312672683,
"loss": 0.0366,
"step": 50000
},
{
"epoch": 3.57,
"eval_cer": 0.09800835136966835,
"eval_loss": 0.05443936586380005,
"eval_runtime": 91.3792,
"eval_samples_per_second": 54.17,
"eval_steps_per_second": 0.427,
"eval_wer": 0.27434343434343433,
"step": 50000
},
{
"epoch": 3.64,
"learning_rate": 0.00021393439625551483,
"loss": 0.0365,
"step": 51000
},
{
"epoch": 3.72,
"learning_rate": 0.00021085037998986924,
"loss": 0.036,
"step": 52000
},
{
"epoch": 3.79,
"learning_rate": 0.00020773525892251514,
"loss": 0.0361,
"step": 53000
},
{
"epoch": 3.86,
"learning_rate": 0.00020459062540543316,
"loss": 0.0358,
"step": 54000
},
{
"epoch": 3.93,
"learning_rate": 0.00020141808687644067,
"loss": 0.0355,
"step": 55000
},
{
"epoch": 3.93,
"eval_cer": 0.09678406681095734,
"eval_loss": 0.052644312381744385,
"eval_runtime": 91.3207,
"eval_samples_per_second": 54.205,
"eval_steps_per_second": 0.427,
"eval_wer": 0.27090909090909093,
"step": 55000
},
{
"epoch": 4.0,
"learning_rate": 0.00019821926503751995,
"loss": 0.0351,
"step": 56000
},
{
"epoch": 4.07,
"learning_rate": 0.00019499579502585537,
"loss": 0.0327,
"step": 57000
},
{
"epoch": 4.14,
"learning_rate": 0.00019174932457800242,
"loss": 0.0326,
"step": 58000
},
{
"epoch": 4.22,
"learning_rate": 0.0001884815131876167,
"loss": 0.0327,
"step": 59000
},
{
"epoch": 4.29,
"learning_rate": 0.00018519403125717278,
"loss": 0.0324,
"step": 60000
},
{
"epoch": 4.29,
"eval_cer": 0.09630309787717803,
"eval_loss": 0.05228239297866821,
"eval_runtime": 91.3618,
"eval_samples_per_second": 54.18,
"eval_steps_per_second": 0.427,
"eval_wer": 0.26686868686868687,
"step": 60000
},
{
"epoch": 4.36,
"learning_rate": 0.00018188855924410722,
"loss": 0.0326,
"step": 61000
},
{
"epoch": 4.43,
"learning_rate": 0.00017856678680182127,
"loss": 0.0323,
"step": 62000
},
{
"epoch": 4.5,
"learning_rate": 0.0001752304119159834,
"loss": 0.0322,
"step": 63000
},
{
"epoch": 4.57,
"learning_rate": 0.00017188114003657205,
"loss": 0.0323,
"step": 64000
},
{
"epoch": 4.64,
"learning_rate": 0.00016852068320610358,
"loss": 0.0316,
"step": 65000
},
{
"epoch": 4.64,
"eval_cer": 0.09446667103911152,
"eval_loss": 0.050846464931964874,
"eval_runtime": 91.2629,
"eval_samples_per_second": 54.239,
"eval_steps_per_second": 0.427,
"eval_wer": 0.2638383838383838,
"step": 65000
},
{
"epoch": 4.72,
"learning_rate": 0.00016515075918448972,
"loss": 0.0318,
"step": 66000
},
{
"epoch": 4.79,
"learning_rate": 0.00016177309057097285,
"loss": 0.0315,
"step": 67000
},
{
"epoch": 4.86,
"learning_rate": 0.00015838940392358722,
"loss": 0.0315,
"step": 68000
},
{
"epoch": 4.93,
"learning_rate": 0.00015500142887659688,
"loss": 0.0313,
"step": 69000
},
{
"epoch": 5.0,
"learning_rate": 0.00015161089725636095,
"loss": 0.0313,
"step": 70000
},
{
"epoch": 5.0,
"eval_cer": 0.09420432434795917,
"eval_loss": 0.048644062131643295,
"eval_runtime": 91.4526,
"eval_samples_per_second": 54.126,
"eval_steps_per_second": 0.426,
"eval_wer": 0.26141414141414143,
"step": 70000
},
{
"epoch": 5.07,
"learning_rate": 0.00014821954219607845,
"loss": 0.0289,
"step": 71000
},
{
"epoch": 5.14,
"learning_rate": 0.0001448290972498651,
"loss": 0.029,
"step": 72000
},
{
"epoch": 5.22,
"learning_rate": 0.00014144129550661485,
"loss": 0.0288,
"step": 73000
},
{
"epoch": 5.29,
"learning_rate": 0.0001380578687040995,
"loss": 0.0289,
"step": 74000
},
{
"epoch": 5.36,
"learning_rate": 0.00013468054634375843,
"loss": 0.0292,
"step": 75000
},
{
"epoch": 5.36,
"eval_cer": 0.0939638398810695,
"eval_loss": 0.049041230231523514,
"eval_runtime": 91.4249,
"eval_samples_per_second": 54.143,
"eval_steps_per_second": 0.427,
"eval_wer": 0.258989898989899,
"step": 75000
},
{
"epoch": 5.43,
"learning_rate": 0.00013131105480663235,
"loss": 0.0289,
"step": 76000
},
{
"epoch": 5.5,
"learning_rate": 0.000127951116470891,
"loss": 0.0288,
"step": 77000
},
{
"epoch": 5.57,
"learning_rate": 0.00012460244883140783,
"loss": 0.0287,
"step": 78000
},
{
"epoch": 5.64,
"learning_rate": 0.0001212667636218309,
"loss": 0.0284,
"step": 79000
},
{
"epoch": 5.72,
"learning_rate": 0.00011794576593959775,
"loss": 0.0283,
"step": 80000
},
{
"epoch": 5.72,
"eval_cer": 0.0939419776568068,
"eval_loss": 0.04810198023915291,
"eval_runtime": 91.3828,
"eval_samples_per_second": 54.168,
"eval_steps_per_second": 0.427,
"eval_wer": 0.2604040404040404,
"step": 80000
},
{
"epoch": 5.79,
"learning_rate": 0.00011464115337434394,
"loss": 0.029,
"step": 81000
},
{
"epoch": 5.86,
"learning_rate": 0.00011135461514014796,
"loss": 0.0289,
"step": 82000
},
{
"epoch": 5.93,
"learning_rate": 0.00010808783121205837,
"loss": 0.0285,
"step": 83000
},
{
"epoch": 6.0,
"learning_rate": 0.00010484247146734352,
"loss": 0.029,
"step": 84000
},
{
"epoch": 6.07,
"learning_rate": 0.00010162019483190237,
"loss": 0.0282,
"step": 85000
},
{
"epoch": 6.07,
"eval_cer": 0.09354845762007827,
"eval_loss": 0.048511628061532974,
"eval_runtime": 93.8707,
"eval_samples_per_second": 52.732,
"eval_steps_per_second": 0.415,
"eval_wer": 0.25696969696969696,
"step": 85000
},
{
"epoch": 6.15,
"learning_rate": 9.842264843227404e-05,
"loss": 0.0282,
"step": 86000
},
{
"epoch": 6.22,
"learning_rate": 9.52514667536784e-05,
"loss": 0.0281,
"step": 87000
},
{
"epoch": 6.29,
"learning_rate": 9.210827080451842e-05,
"loss": 0.0281,
"step": 88000
},
{
"epoch": 6.36,
"learning_rate": 8.899466728777203e-05,
"loss": 0.0279,
"step": 89000
},
{
"epoch": 6.43,
"learning_rate": 8.591224777969557e-05,
"loss": 0.028,
"step": 90000
},
{
"epoch": 6.43,
"eval_cer": 0.09381080431123062,
"eval_loss": 0.047929223626852036,
"eval_runtime": 93.5944,
"eval_samples_per_second": 52.888,
"eval_steps_per_second": 0.417,
"eval_wer": 0.2608080808080808,
"step": 90000
},
{
"epoch": 6.5,
"learning_rate": 8.286258791626041e-05,
"loss": 0.028,
"step": 91000
},
{
"epoch": 6.57,
"learning_rate": 7.984724658773716e-05,
"loss": 0.0276,
"step": 92000
},
{
"epoch": 6.65,
"learning_rate": 7.686776514184009e-05,
"loss": 0.0275,
"step": 93000
},
{
"epoch": 6.72,
"learning_rate": 7.392566659583846e-05,
"loss": 0.0274,
"step": 94000
},
{
"epoch": 6.79,
"learning_rate": 7.102245485803813e-05,
"loss": 0.0276,
"step": 95000
},
{
"epoch": 6.79,
"eval_cer": 0.09346100872302748,
"eval_loss": 0.04663492366671562,
"eval_runtime": 93.7458,
"eval_samples_per_second": 52.802,
"eval_steps_per_second": 0.416,
"eval_wer": 0.25737373737373737,
"step": 95000
},
{
"epoch": 6.86,
"learning_rate": 6.81596139590308e-05,
"loss": 0.0273,
"step": 96000
},
{
"epoch": 6.93,
"learning_rate": 6.533860729310434e-05,
"loss": 0.0275,
"step": 97000
},
{
"epoch": 7.0,
"learning_rate": 6.256087687020127e-05,
"loss": 0.0271,
"step": 98000
},
{
"epoch": 7.07,
"learning_rate": 5.98278425788092e-05,
"loss": 0.0256,
"step": 99000
},
{
"epoch": 7.15,
"learning_rate": 5.71409014601578e-05,
"loss": 0.0257,
"step": 100000
},
{
"epoch": 7.15,
"eval_cer": 0.09276141754662119,
"eval_loss": 0.04688685014843941,
"eval_runtime": 93.8485,
"eval_samples_per_second": 52.745,
"eval_steps_per_second": 0.416,
"eval_wer": 0.25696969696969696,
"step": 100000
},
{
"epoch": 7.22,
"learning_rate": 5.4501426994095876e-05,
"loss": 0.0261,
"step": 101000
},
{
"epoch": 7.29,
"learning_rate": 5.191076839701103e-05,
"loss": 0.0262,
"step": 102000
},
{
"epoch": 7.36,
"learning_rate": 4.9370249932153075e-05,
"loss": 0.0261,
"step": 103000
},
{
"epoch": 7.43,
"learning_rate": 4.6881170232712164e-05,
"loss": 0.0259,
"step": 104000
},
{
"epoch": 7.5,
"learning_rate": 4.444480163799822e-05,
"loss": 0.0261,
"step": 105000
},
{
"epoch": 7.5,
"eval_cer": 0.09184320412758794,
"eval_loss": 0.04592433199286461,
"eval_runtime": 93.414,
"eval_samples_per_second": 52.99,
"eval_steps_per_second": 0.417,
"eval_wer": 0.25515151515151513,
"step": 105000
},
{
"epoch": 7.57,
"learning_rate": 4.2062389543061265e-05,
"loss": 0.0257,
"step": 106000
},
{
"epoch": 7.65,
"learning_rate": 3.9735151762084384e-05,
"loss": 0.0258,
"step": 107000
},
{
"epoch": 7.72,
"learning_rate": 3.746427790587557e-05,
"loss": 0.0258,
"step": 108000
},
{
"epoch": 7.79,
"learning_rate": 3.525092877377602e-05,
"loss": 0.0258,
"step": 109000
},
{
"epoch": 7.86,
"learning_rate": 3.309623576029597e-05,
"loss": 0.0258,
"step": 110000
},
{
"epoch": 7.86,
"eval_cer": 0.09195251524890143,
"eval_loss": 0.04556591063737869,
"eval_runtime": 93.7142,
"eval_samples_per_second": 52.82,
"eval_steps_per_second": 0.416,
"eval_wer": 0.25333333333333335,
"step": 110000
},
{
"epoch": 7.93,
"learning_rate": 3.1001300276781274e-05,
"loss": 0.0258,
"step": 111000
},
{
"epoch": 8.0,
"learning_rate": 2.8967193188406938e-05,
"loss": 0.0255,
"step": 112000
},
{
"epoch": 8.07,
"learning_rate": 2.699495426678389e-05,
"loss": 0.0241,
"step": 113000
},
{
"epoch": 8.15,
"learning_rate": 2.5085591658461056e-05,
"loss": 0.0243,
"step": 114000
},
{
"epoch": 8.22,
"learning_rate": 2.3240081369591984e-05,
"loss": 0.0249,
"step": 115000
},
{
"epoch": 8.22,
"eval_cer": 0.09116547517544435,
"eval_loss": 0.04581727460026741,
"eval_runtime": 93.453,
"eval_samples_per_second": 52.968,
"eval_steps_per_second": 0.417,
"eval_wer": 0.25292929292929295,
"step": 115000
},
{
"epoch": 8.29,
"learning_rate": 2.1459366767031522e-05,
"loss": 0.0247,
"step": 116000
},
{
"epoch": 8.36,
"learning_rate": 1.9744358096116225e-05,
"loss": 0.0249,
"step": 117000
},
{
"epoch": 8.43,
"learning_rate": 1.8095932015375496e-05,
"loss": 0.0247,
"step": 118000
},
{
"epoch": 8.5,
"learning_rate": 1.65149311484114e-05,
"loss": 0.0246,
"step": 119000
},
{
"epoch": 8.57,
"learning_rate": 1.500216365317587e-05,
"loss": 0.025,
"step": 120000
},
{
"epoch": 8.57,
"eval_cer": 0.09134037296954592,
"eval_loss": 0.045206133276224136,
"eval_runtime": 93.5201,
"eval_samples_per_second": 52.93,
"eval_steps_per_second": 0.417,
"eval_wer": 0.25191919191919193,
"step": 120000
},
{
"epoch": 8.65,
"learning_rate": 1.355840280886582e-05,
"loss": 0.025,
"step": 121000
},
{
"epoch": 8.72,
"learning_rate": 1.2184386620647097e-05,
"loss": 0.0247,
"step": 122000
},
{
"epoch": 8.79,
"learning_rate": 1.0880817442409478e-05,
"loss": 0.0248,
"step": 123000
},
{
"epoch": 8.86,
"learning_rate": 9.648361617745371e-06,
"loss": 0.0248,
"step": 124000
},
{
"epoch": 8.93,
"learning_rate": 8.487649139335962e-06,
"loss": 0.0248,
"step": 125000
},
{
"epoch": 8.93,
"eval_cer": 0.09169016855774907,
"eval_loss": 0.04524415731430054,
"eval_runtime": 93.2177,
"eval_samples_per_second": 53.101,
"eval_steps_per_second": 0.418,
"eval_wer": 0.25292929292929295,
"step": 125000
},
{
"epoch": 9.0,
"learning_rate": 7.399273326918692e-06,
"loss": 0.0246,
"step": 126000
},
{
"epoch": 9.07,
"learning_rate": 6.383790524001009e-06,
"loss": 0.024,
"step": 127000
},
{
"epoch": 9.15,
"learning_rate": 5.441719813474849e-06,
"loss": 0.0241,
"step": 128000
},
{
"epoch": 9.22,
"learning_rate": 4.57354275227797e-06,
"loss": 0.0242,
"step": 129000
},
{
"epoch": 9.29,
"learning_rate": 3.7797031252369767e-06,
"loss": 0.0243,
"step": 130000
},
{
"epoch": 9.29,
"eval_cer": 0.09138409741807131,
"eval_loss": 0.0453341044485569,
"eval_runtime": 93.6155,
"eval_samples_per_second": 52.876,
"eval_steps_per_second": 0.417,
"eval_wer": 0.25212121212121213,
"step": 130000
},
{
"epoch": 9.36,
"learning_rate": 3.0606067182186776e-06,
"loss": 0.024,
"step": 131000
},
{
"epoch": 9.43,
"learning_rate": 2.4166211107049584e-06,
"loss": 0.0242,
"step": 132000
},
{
"epoch": 9.5,
"learning_rate": 1.8480754878977489e-06,
"loss": 0.0241,
"step": 133000
},
{
"epoch": 9.57,
"learning_rate": 1.3552604724498928e-06,
"loss": 0.0241,
"step": 134000
},
{
"epoch": 9.65,
"learning_rate": 9.384279759080127e-07,
"loss": 0.024,
"step": 135000
},
{
"epoch": 9.65,
"eval_cer": 0.09129664852102053,
"eval_loss": 0.04531262069940567,
"eval_runtime": 93.4097,
"eval_samples_per_second": 52.992,
"eval_steps_per_second": 0.418,
"eval_wer": 0.25212121212121213,
"step": 135000
}
],
"max_steps": 139950,
"num_train_epochs": 10,
"total_flos": 4.19385405764928e+18,
"trial_name": null,
"trial_params": null
}