{ "best_metric": 0.2446382894995737, "best_model_checkpoint": "/scratch/lingjzhu_root/lingjzhu1/lingjzhu/g2p/byt5_8_layers_baseline/checkpoint-5000", "epoch": 9.646302250803858, "global_step": 135000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0003, "loss": 2.3019, "step": 1000 }, { "epoch": 0.14, "learning_rate": 0.0002999616623572683, "loss": 0.7533, "step": 2000 }, { "epoch": 0.21, "learning_rate": 0.00029984666902607135, "loss": 0.4132, "step": 3000 }, { "epoch": 0.29, "learning_rate": 0.0002996550787873857, "loss": 0.3262, "step": 4000 }, { "epoch": 0.36, "learning_rate": 0.0002993869895761197, "loss": 0.2811, "step": 5000 }, { "epoch": 0.36, "eval_cer": 0.2446382894995737, "eval_loss": 0.3433528244495392, "eval_runtime": 78.9346, "eval_samples_per_second": 62.71, "eval_steps_per_second": 0.127, "eval_wer": 0.5844444444444444, "step": 5000 }, { "epoch": 0.43, "learning_rate": 0.000299042538431052, "loss": 0.2505, "step": 6000 }, { "epoch": 0.5, "learning_rate": 0.00029862190142478177, "loss": 0.2294, "step": 7000 }, { "epoch": 0.57, "learning_rate": 0.00029812529357372587, "loss": 0.2141, "step": 8000 }, { "epoch": 0.64, "learning_rate": 0.00029755296872820933, "loss": 0.2021, "step": 9000 }, { "epoch": 0.71, "learning_rate": 0.0002969052194427048, "loss": 0.193, "step": 10000 }, { "epoch": 0.71, "eval_cer": 0.1929341291182965, "eval_loss": 0.23424917459487915, "eval_runtime": 78.954, "eval_samples_per_second": 62.695, "eval_steps_per_second": 0.127, "eval_wer": 0.4892929292929293, "step": 10000 }, { "epoch": 0.79, "learning_rate": 0.0002961823768262882, "loss": 0.1847, "step": 11000 }, { "epoch": 0.86, "learning_rate": 0.0002953848103733858, "loss": 0.1772, "step": 12000 }, { "epoch": 0.93, "learning_rate": 0.00029451292777490066, "loss": 0.1716, "step": 13000 }, { "epoch": 1.0, "learning_rate": 0.0002935671747098137, "loss": 0.165, "step": 14000 }, { "epoch": 1.07, "learning_rate": 0.00029254803461736643, "loss": 0.1599, "step": 15000 }, { "epoch": 1.07, "eval_cer": 0.17201198049889596, "eval_loss": 0.18778055906295776, "eval_runtime": 78.8177, "eval_samples_per_second": 62.803, "eval_steps_per_second": 0.127, "eval_wer": 0.43212121212121213, "step": 15000 }, { "epoch": 1.14, "learning_rate": 0.00029145602844994243, "loss": 0.1555, "step": 16000 }, { "epoch": 1.21, "learning_rate": 0.0002902917144067724, "loss": 0.151, "step": 17000 }, { "epoch": 1.29, "learning_rate": 0.00028905568764860047, "loss": 0.1469, "step": 18000 }, { "epoch": 1.36, "learning_rate": 0.00028774857999345685, "loss": 0.144, "step": 19000 }, { "epoch": 1.43, "learning_rate": 0.0002863710595936922, "loss": 0.1406, "step": 20000 }, { "epoch": 1.43, "eval_cer": 0.15764849915830437, "eval_loss": 0.16295142471790314, "eval_runtime": 79.2207, "eval_samples_per_second": 62.484, "eval_steps_per_second": 0.126, "eval_wer": 0.41353535353535353, "step": 20000 }, { "epoch": 1.5, "learning_rate": 0.0002849238305944389, "loss": 0.1368, "step": 21000 }, { "epoch": 1.57, "learning_rate": 0.00028340763277367477, "loss": 0.1345, "step": 22000 }, { "epoch": 1.64, "learning_rate": 0.0002818232411640713, "loss": 0.132, "step": 23000 }, { "epoch": 1.71, "learning_rate": 0.00028017146565682144, "loss": 0.1303, "step": 24000 }, { "epoch": 1.79, "learning_rate": 0.00027845315058764886, "loss": 0.1261, "step": 25000 }, { "epoch": 1.79, "eval_cer": 0.14901292057453924, "eval_loss": 0.14625480771064758, "eval_runtime": 78.8658, "eval_samples_per_second": 62.765, "eval_steps_per_second": 0.127, "eval_wer": 0.39636363636363636, "step": 25000 }, { "epoch": 1.86, "learning_rate": 0.00027666917430520975, "loss": 0.1243, "step": 26000 }, { "epoch": 1.93, "learning_rate": 0.00027482044872210895, "loss": 0.1218, "step": 27000 }, { "epoch": 2.0, "learning_rate": 0.0002729079188487587, "loss": 0.1196, "step": 28000 }, { "epoch": 2.07, "learning_rate": 0.00027093256231031885, "loss": 0.1174, "step": 29000 }, { "epoch": 2.14, "learning_rate": 0.00026889538884696597, "loss": 0.116, "step": 30000 }, { "epoch": 2.14, "eval_cer": 0.1407708620275027, "eval_loss": 0.1319253146648407, "eval_runtime": 79.0974, "eval_samples_per_second": 62.581, "eval_steps_per_second": 0.126, "eval_wer": 0.37696969696969695, "step": 30000 }, { "epoch": 2.22, "learning_rate": 0.0002667974397977457, "loss": 0.114, "step": 31000 }, { "epoch": 2.29, "learning_rate": 0.0002646397875682729, "loss": 0.1122, "step": 32000 }, { "epoch": 2.36, "learning_rate": 0.00026242353508255185, "loss": 0.111, "step": 33000 }, { "epoch": 2.43, "learning_rate": 0.0002601498152191957, "loss": 0.1099, "step": 34000 }, { "epoch": 2.5, "learning_rate": 0.0002578197902323352, "loss": 0.1089, "step": 35000 }, { "epoch": 2.5, "eval_cer": 0.13464943923394765, "eval_loss": 0.12354536354541779, "eval_runtime": 79.0393, "eval_samples_per_second": 62.627, "eval_steps_per_second": 0.127, "eval_wer": 0.36505050505050507, "step": 35000 }, { "epoch": 2.57, "learning_rate": 0.00025543465115751026, "loss": 0.1067, "step": 36000 }, { "epoch": 2.64, "learning_rate": 0.0002529956172028505, "loss": 0.1058, "step": 37000 }, { "epoch": 2.72, "learning_rate": 0.0002505039351258541, "loss": 0.1043, "step": 38000 }, { "epoch": 2.79, "learning_rate": 0.0002479608785960846, "loss": 0.1033, "step": 39000 }, { "epoch": 2.86, "learning_rate": 0.0002453677475441111, "loss": 0.1017, "step": 40000 }, { "epoch": 2.86, "eval_cer": 0.1299927854659933, "eval_loss": 0.11381864547729492, "eval_runtime": 79.0365, "eval_samples_per_second": 62.629, "eval_steps_per_second": 0.127, "eval_wer": 0.3557575757575758, "step": 40000 }, { "epoch": 2.93, "learning_rate": 0.00024272586749702474, "loss": 0.1001, "step": 41000 }, { "epoch": 3.0, "learning_rate": 0.0002400365889008706, "loss": 0.1001, "step": 42000 }, { "epoch": 3.07, "learning_rate": 0.00023730128643034235, "loss": 0.0972, "step": 43000 }, { "epoch": 3.14, "learning_rate": 0.00023452135828609167, "loss": 0.0974, "step": 44000 }, { "epoch": 3.22, "learning_rate": 0.0002316982254800121, "loss": 0.0963, "step": 45000 }, { "epoch": 3.22, "eval_cer": 0.12660414070527534, "eval_loss": 0.10989916324615479, "eval_runtime": 79.0323, "eval_samples_per_second": 62.633, "eval_steps_per_second": 0.127, "eval_wer": 0.3470707070707071, "step": 45000 }, { "epoch": 3.29, "learning_rate": 0.00022883333110886237, "loss": 0.0957, "step": 46000 }, { "epoch": 3.36, "learning_rate": 0.00022592813961660067, "loss": 0.0944, "step": 47000 }, { "epoch": 3.43, "learning_rate": 0.00022298413604580696, "loss": 0.0934, "step": 48000 }, { "epoch": 3.5, "learning_rate": 0.00022000282527857588, "loss": 0.093, "step": 49000 }, { "epoch": 3.57, "learning_rate": 0.0002169857312672683, "loss": 0.0919, "step": 50000 }, { "epoch": 3.57, "eval_cer": 0.12306246037471852, "eval_loss": 0.10183772444725037, "eval_runtime": 79.0109, "eval_samples_per_second": 62.65, "eval_steps_per_second": 0.127, "eval_wer": 0.3395959595959596, "step": 50000 }, { "epoch": 3.64, "learning_rate": 0.00021393439625551483, "loss": 0.0916, "step": 51000 }, { "epoch": 3.72, "learning_rate": 0.00021085037998986924, "loss": 0.0908, "step": 52000 }, { "epoch": 3.79, "learning_rate": 0.00020773525892251514, "loss": 0.0902, "step": 53000 }, { "epoch": 3.86, "learning_rate": 0.00020459062540543316, "loss": 0.0891, "step": 54000 }, { "epoch": 3.93, "learning_rate": 0.00020141808687644067, "loss": 0.0885, "step": 55000 }, { "epoch": 3.93, "eval_cer": 0.12083251349992348, "eval_loss": 0.09825348109006882, "eval_runtime": 79.07, "eval_samples_per_second": 62.603, "eval_steps_per_second": 0.126, "eval_wer": 0.33090909090909093, "step": 55000 }, { "epoch": 4.0, "learning_rate": 0.00019821926503751995, "loss": 0.0878, "step": 56000 }, { "epoch": 4.07, "learning_rate": 0.00019499579502585537, "loss": 0.0867, "step": 57000 }, { "epoch": 4.14, "learning_rate": 0.00019174932457800242, "loss": 0.0856, "step": 58000 }, { "epoch": 4.22, "learning_rate": 0.0001884815131876167, "loss": 0.0855, "step": 59000 }, { "epoch": 4.29, "learning_rate": 0.00018519403125717278, "loss": 0.0843, "step": 60000 }, { "epoch": 4.29, "eval_cer": 0.11809973546708642, "eval_loss": 0.09398272633552551, "eval_runtime": 79.1077, "eval_samples_per_second": 62.573, "eval_steps_per_second": 0.126, "eval_wer": 0.32525252525252524, "step": 60000 }, { "epoch": 4.36, "learning_rate": 0.00018188855924410722, "loss": 0.0846, "step": 61000 }, { "epoch": 4.43, "learning_rate": 0.00017856678680182127, "loss": 0.0836, "step": 62000 }, { "epoch": 4.5, "learning_rate": 0.0001752304119159834, "loss": 0.0832, "step": 63000 }, { "epoch": 4.57, "learning_rate": 0.00017188114003657205, "loss": 0.0828, "step": 64000 }, { "epoch": 4.64, "learning_rate": 0.00016852068320610358, "loss": 0.0823, "step": 65000 }, { "epoch": 4.64, "eval_cer": 0.11665682866574845, "eval_loss": 0.09152530878782272, "eval_runtime": 79.2818, "eval_samples_per_second": 62.435, "eval_steps_per_second": 0.126, "eval_wer": 0.32484848484848483, "step": 65000 }, { "epoch": 4.72, "learning_rate": 0.00016515075918448972, "loss": 0.0818, "step": 66000 }, { "epoch": 4.79, "learning_rate": 0.00016177309057097285, "loss": 0.0806, "step": 67000 }, { "epoch": 4.86, "learning_rate": 0.00015838940392358722, "loss": 0.0807, "step": 68000 }, { "epoch": 4.93, "learning_rate": 0.00015500142887659688, "loss": 0.0799, "step": 69000 }, { "epoch": 5.0, "learning_rate": 0.00015161089725636095, "loss": 0.0795, "step": 70000 }, { "epoch": 5.0, "eval_cer": 0.11567302857392711, "eval_loss": 0.08921054005622864, "eval_runtime": 79.061, "eval_samples_per_second": 62.61, "eval_steps_per_second": 0.126, "eval_wer": 0.321010101010101, "step": 70000 }, { "epoch": 5.07, "learning_rate": 0.00014821954219607845, "loss": 0.0783, "step": 71000 }, { "epoch": 5.14, "learning_rate": 0.0001448290972498651, "loss": 0.0784, "step": 72000 }, { "epoch": 5.22, "learning_rate": 0.00014144129550661485, "loss": 0.0778, "step": 73000 }, { "epoch": 5.29, "learning_rate": 0.0001380578687040995, "loss": 0.0773, "step": 74000 }, { "epoch": 5.36, "learning_rate": 0.00013468054634375843, "loss": 0.0772, "step": 75000 }, { "epoch": 5.36, "eval_cer": 0.11359611726897094, "eval_loss": 0.08604130893945694, "eval_runtime": 79.1221, "eval_samples_per_second": 62.562, "eval_steps_per_second": 0.126, "eval_wer": 0.31777777777777777, "step": 75000 }, { "epoch": 5.43, "learning_rate": 0.00013131105480663235, "loss": 0.0767, "step": 76000 }, { "epoch": 5.5, "learning_rate": 0.000127951116470891, "loss": 0.0766, "step": 77000 }, { "epoch": 5.57, "learning_rate": 0.00012460244883140783, "loss": 0.0757, "step": 78000 }, { "epoch": 5.64, "learning_rate": 0.0001212667636218309, "loss": 0.0753, "step": 79000 }, { "epoch": 5.72, "learning_rate": 0.00011794576593959775, "loss": 0.075, "step": 80000 }, { "epoch": 5.72, "eval_cer": 0.11416453509980105, "eval_loss": 0.08475763350725174, "eval_runtime": 79.0104, "eval_samples_per_second": 62.65, "eval_steps_per_second": 0.127, "eval_wer": 0.317979797979798, "step": 80000 }, { "epoch": 5.79, "learning_rate": 0.00011464115337434394, "loss": 0.075, "step": 81000 }, { "epoch": 5.86, "learning_rate": 0.00011135461514014796, "loss": 0.0742, "step": 82000 }, { "epoch": 5.93, "learning_rate": 0.00010808783121205837, "loss": 0.0743, "step": 83000 }, { "epoch": 6.0, "learning_rate": 0.00010484247146734352, "loss": 0.0736, "step": 84000 }, { "epoch": 6.07, "learning_rate": 0.00010162019483190237, "loss": 0.0725, "step": 85000 }, { "epoch": 6.07, "eval_cer": 0.11407708620275027, "eval_loss": 0.081719771027565, "eval_runtime": 79.1888, "eval_samples_per_second": 62.509, "eval_steps_per_second": 0.126, "eval_wer": 0.31373737373737376, "step": 85000 }, { "epoch": 6.15, "learning_rate": 9.842264843227404e-05, "loss": 0.0727, "step": 86000 }, { "epoch": 6.22, "learning_rate": 9.52514667536784e-05, "loss": 0.0718, "step": 87000 }, { "epoch": 6.29, "learning_rate": 9.210827080451842e-05, "loss": 0.0719, "step": 88000 }, { "epoch": 6.36, "learning_rate": 8.899466728777203e-05, "loss": 0.0718, "step": 89000 }, { "epoch": 6.43, "learning_rate": 8.591224777969557e-05, "loss": 0.0713, "step": 90000 }, { "epoch": 6.43, "eval_cer": 0.11197831267353141, "eval_loss": 0.08096928149461746, "eval_runtime": 78.7279, "eval_samples_per_second": 62.875, "eval_steps_per_second": 0.127, "eval_wer": 0.31353535353535356, "step": 90000 }, { "epoch": 6.5, "learning_rate": 8.286258791626041e-05, "loss": 0.071, "step": 91000 }, { "epoch": 6.57, "learning_rate": 7.984724658773716e-05, "loss": 0.071, "step": 92000 }, { "epoch": 6.65, "learning_rate": 7.686776514184009e-05, "loss": 0.0708, "step": 93000 }, { "epoch": 6.72, "learning_rate": 7.392566659583846e-05, "loss": 0.0705, "step": 94000 }, { "epoch": 6.79, "learning_rate": 7.102245485803813e-05, "loss": 0.07, "step": 95000 }, { "epoch": 6.79, "eval_cer": 0.11123499704859972, "eval_loss": 0.07890674471855164, "eval_runtime": 79.0377, "eval_samples_per_second": 62.628, "eval_steps_per_second": 0.127, "eval_wer": 0.3103030303030303, "step": 95000 }, { "epoch": 6.86, "learning_rate": 6.81596139590308e-05, "loss": 0.0701, "step": 96000 }, { "epoch": 6.93, "learning_rate": 6.533860729310434e-05, "loss": 0.0698, "step": 97000 }, { "epoch": 7.0, "learning_rate": 6.256087687020127e-05, "loss": 0.0698, "step": 98000 }, { "epoch": 7.07, "learning_rate": 5.98278425788092e-05, "loss": 0.0687, "step": 99000 }, { "epoch": 7.15, "learning_rate": 5.71409014601578e-05, "loss": 0.0682, "step": 100000 }, { "epoch": 7.15, "eval_cer": 0.11134430816991321, "eval_loss": 0.07869766652584076, "eval_runtime": 78.9978, "eval_samples_per_second": 62.66, "eval_steps_per_second": 0.127, "eval_wer": 0.3088888888888889, "step": 100000 }, { "epoch": 7.22, "learning_rate": 5.4501426994095876e-05, "loss": 0.0682, "step": 101000 }, { "epoch": 7.29, "learning_rate": 5.191076839701103e-05, "loss": 0.0681, "step": 102000 }, { "epoch": 7.36, "learning_rate": 4.9370249932153075e-05, "loss": 0.0681, "step": 103000 }, { "epoch": 7.43, "learning_rate": 4.6881170232712164e-05, "loss": 0.0682, "step": 104000 }, { "epoch": 7.5, "learning_rate": 4.444480163799822e-05, "loss": 0.0681, "step": 105000 }, { "epoch": 7.5, "eval_cer": 0.11066657921776962, "eval_loss": 0.07721372693777084, "eval_runtime": 79.1073, "eval_samples_per_second": 62.573, "eval_steps_per_second": 0.126, "eval_wer": 0.30808080808080807, "step": 105000 }, { "epoch": 7.57, "learning_rate": 4.2062389543061265e-05, "loss": 0.0673, "step": 106000 }, { "epoch": 7.65, "learning_rate": 3.9735151762084384e-05, "loss": 0.0674, "step": 107000 }, { "epoch": 7.72, "learning_rate": 3.746427790587557e-05, "loss": 0.0666, "step": 108000 }, { "epoch": 7.79, "learning_rate": 3.525092877377602e-05, "loss": 0.0674, "step": 109000 }, { "epoch": 7.86, "learning_rate": 3.309623576029597e-05, "loss": 0.0666, "step": 110000 }, { "epoch": 7.86, "eval_cer": 0.11116941037581164, "eval_loss": 0.07652007043361664, "eval_runtime": 78.7656, "eval_samples_per_second": 62.845, "eval_steps_per_second": 0.127, "eval_wer": 0.30686868686868685, "step": 110000 }, { "epoch": 7.93, "learning_rate": 3.1001300276781274e-05, "loss": 0.0668, "step": 111000 }, { "epoch": 8.0, "learning_rate": 2.8967193188406938e-05, "loss": 0.0663, "step": 112000 }, { "epoch": 8.07, "learning_rate": 2.699495426678389e-05, "loss": 0.066, "step": 113000 }, { "epoch": 8.15, "learning_rate": 2.5085591658461056e-05, "loss": 0.0658, "step": 114000 }, { "epoch": 8.22, "learning_rate": 2.3240081369591984e-05, "loss": 0.0656, "step": 115000 }, { "epoch": 8.22, "eval_cer": 0.11088520146039658, "eval_loss": 0.07624918967485428, "eval_runtime": 78.9004, "eval_samples_per_second": 62.737, "eval_steps_per_second": 0.127, "eval_wer": 0.30727272727272725, "step": 115000 }, { "epoch": 8.29, "learning_rate": 2.1459366767031522e-05, "loss": 0.0656, "step": 116000 }, { "epoch": 8.36, "learning_rate": 1.9744358096116225e-05, "loss": 0.0654, "step": 117000 }, { "epoch": 8.43, "learning_rate": 1.8095932015375496e-05, "loss": 0.0655, "step": 118000 }, { "epoch": 8.5, "learning_rate": 1.65149311484114e-05, "loss": 0.0651, "step": 119000 }, { "epoch": 8.57, "learning_rate": 1.500216365317587e-05, "loss": 0.0655, "step": 120000 }, { "epoch": 8.57, "eval_cer": 0.11158479263680286, "eval_loss": 0.07576470077037811, "eval_runtime": 79.0474, "eval_samples_per_second": 62.621, "eval_steps_per_second": 0.127, "eval_wer": 0.30808080808080807, "step": 120000 }, { "epoch": 8.65, "learning_rate": 1.355840280886582e-05, "loss": 0.0652, "step": 121000 }, { "epoch": 8.72, "learning_rate": 1.2184386620647097e-05, "loss": 0.0656, "step": 122000 }, { "epoch": 8.79, "learning_rate": 1.0880817442409478e-05, "loss": 0.0655, "step": 123000 }, { "epoch": 8.86, "learning_rate": 9.648361617745371e-06, "loss": 0.0651, "step": 124000 }, { "epoch": 8.93, "learning_rate": 8.487649139335962e-06, "loss": 0.0652, "step": 125000 }, { "epoch": 8.93, "eval_cer": 0.1107321658905577, "eval_loss": 0.07536973804235458, "eval_runtime": 79.0386, "eval_samples_per_second": 62.628, "eval_steps_per_second": 0.127, "eval_wer": 0.30707070707070705, "step": 125000 }, { "epoch": 9.0, "learning_rate": 7.399273326918692e-06, "loss": 0.065, "step": 126000 }, { "epoch": 9.07, "learning_rate": 6.383790524001009e-06, "loss": 0.0645, "step": 127000 }, { "epoch": 9.15, "learning_rate": 5.441719813474849e-06, "loss": 0.0647, "step": 128000 }, { "epoch": 9.22, "learning_rate": 4.57354275227797e-06, "loss": 0.0649, "step": 129000 }, { "epoch": 9.29, "learning_rate": 3.7797031252369767e-06, "loss": 0.0642, "step": 130000 }, { "epoch": 9.29, "eval_cer": 0.11057913032071882, "eval_loss": 0.07509743422269821, "eval_runtime": 78.953, "eval_samples_per_second": 62.695, "eval_steps_per_second": 0.127, "eval_wer": 0.30747474747474746, "step": 130000 }, { "epoch": 9.36, "learning_rate": 3.0606067182186776e-06, "loss": 0.0642, "step": 131000 }, { "epoch": 9.43, "learning_rate": 2.4166211107049584e-06, "loss": 0.0644, "step": 132000 }, { "epoch": 9.5, "learning_rate": 1.8480754878977489e-06, "loss": 0.0644, "step": 133000 }, { "epoch": 9.57, "learning_rate": 1.3552604724498928e-06, "loss": 0.0648, "step": 134000 }, { "epoch": 9.65, "learning_rate": 9.384279759080127e-07, "loss": 0.0645, "step": 135000 }, { "epoch": 9.65, "eval_cer": 0.1107758903390831, "eval_loss": 0.07504051923751831, "eval_runtime": 78.9794, "eval_samples_per_second": 62.675, "eval_steps_per_second": 0.127, "eval_wer": 0.30787878787878786, "step": 135000 } ], "max_steps": 139950, "num_train_epochs": 10, "total_flos": 1.1962625697651917e+17, "trial_name": null, "trial_params": null }