{ "best_metric": 0.6378720998764038, "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base-new_onset-idmt-mdb-enst-2/checkpoint-700", "epoch": 99.99290780141844, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "learning_rate": 7e-05, "loss": 2.6822, "step": 10 }, { "epoch": 0.57, "learning_rate": 0.00016999999999999999, "loss": 1.5659, "step": 20 }, { "epoch": 0.85, "learning_rate": 0.00026, "loss": 1.4226, "step": 30 }, { "epoch": 0.99, "eval_loss": 2.0435445308685303, "eval_runtime": 8.6793, "eval_samples_per_second": 7.259, "eval_steps_per_second": 1.843, "eval_wer": 0.4154365246731337, "step": 35 }, { "epoch": 1.14, "learning_rate": 0.00029948126801152735, "loss": 1.0316, "step": 40 }, { "epoch": 1.43, "learning_rate": 0.0002986167146974063, "loss": 1.0479, "step": 50 }, { "epoch": 1.71, "learning_rate": 0.0002977521613832853, "loss": 1.0524, "step": 60 }, { "epoch": 1.99, "learning_rate": 0.00029688760806916427, "loss": 0.8744, "step": 70 }, { "epoch": 1.99, "eval_loss": 1.71934175491333, "eval_runtime": 9.0494, "eval_samples_per_second": 6.962, "eval_steps_per_second": 1.768, "eval_wer": 0.43821172501054406, "step": 70 }, { "epoch": 2.28, "learning_rate": 0.0002960230547550432, "loss": 0.8641, "step": 80 }, { "epoch": 2.57, "learning_rate": 0.00029515850144092216, "loss": 0.8469, "step": 90 }, { "epoch": 2.85, "learning_rate": 0.00029429394812680113, "loss": 0.9474, "step": 100 }, { "epoch": 2.99, "eval_loss": 1.7853245735168457, "eval_runtime": 9.2275, "eval_samples_per_second": 6.827, "eval_steps_per_second": 1.734, "eval_wer": 0.4373681990721215, "step": 105 }, { "epoch": 3.14, "learning_rate": 0.00029342939481268005, "loss": 0.9012, "step": 110 }, { "epoch": 3.43, "learning_rate": 0.0002925648414985591, "loss": 0.9915, "step": 120 }, { "epoch": 3.71, "learning_rate": 0.000291700288184438, "loss": 0.7743, "step": 130 }, { "epoch": 3.99, "learning_rate": 0.00029083573487031697, "loss": 0.8316, "step": 140 }, { "epoch": 3.99, "eval_loss": 1.2827332019805908, "eval_runtime": 9.447, "eval_samples_per_second": 6.669, "eval_steps_per_second": 1.694, "eval_wer": 0.4306199915647406, "step": 140 }, { "epoch": 4.28, "learning_rate": 0.00028997118155619595, "loss": 0.7751, "step": 150 }, { "epoch": 4.57, "learning_rate": 0.0002891066282420749, "loss": 0.8104, "step": 160 }, { "epoch": 4.85, "learning_rate": 0.00028824207492795384, "loss": 0.8336, "step": 170 }, { "epoch": 4.99, "eval_loss": 1.0676425695419312, "eval_runtime": 8.8457, "eval_samples_per_second": 7.122, "eval_steps_per_second": 1.809, "eval_wer": 0.40404892450442853, "step": 175 }, { "epoch": 5.14, "learning_rate": 0.00028737752161383287, "loss": 0.7711, "step": 180 }, { "epoch": 5.43, "learning_rate": 0.0002865129682997118, "loss": 0.6393, "step": 190 }, { "epoch": 5.71, "learning_rate": 0.00028564841498559076, "loss": 0.7828, "step": 200 }, { "epoch": 5.99, "learning_rate": 0.00028478386167146973, "loss": 0.7345, "step": 210 }, { "epoch": 5.99, "eval_loss": 1.5363637208938599, "eval_runtime": 8.9421, "eval_samples_per_second": 7.045, "eval_steps_per_second": 1.789, "eval_wer": 0.4264023618726276, "step": 210 }, { "epoch": 6.28, "learning_rate": 0.00028391930835734865, "loss": 0.7525, "step": 220 }, { "epoch": 6.57, "learning_rate": 0.0002830547550432276, "loss": 0.6614, "step": 230 }, { "epoch": 6.85, "learning_rate": 0.0002821902017291066, "loss": 0.6666, "step": 240 }, { "epoch": 6.99, "eval_loss": 1.428429365158081, "eval_runtime": 8.8874, "eval_samples_per_second": 7.089, "eval_steps_per_second": 1.8, "eval_wer": 0.4584563475326866, "step": 245 }, { "epoch": 7.14, "learning_rate": 0.00028132564841498557, "loss": 0.7812, "step": 250 }, { "epoch": 7.43, "learning_rate": 0.00028046109510086454, "loss": 0.6833, "step": 260 }, { "epoch": 7.71, "learning_rate": 0.0002795965417867435, "loss": 0.7328, "step": 270 }, { "epoch": 7.99, "learning_rate": 0.00027873198847262244, "loss": 0.6677, "step": 280 }, { "epoch": 7.99, "eval_loss": 0.9474982023239136, "eval_runtime": 8.8866, "eval_samples_per_second": 7.089, "eval_steps_per_second": 1.8, "eval_wer": 0.40025305778152676, "step": 280 }, { "epoch": 8.28, "learning_rate": 0.0002778674351585014, "loss": 0.7165, "step": 290 }, { "epoch": 8.57, "learning_rate": 0.0002770028818443804, "loss": 0.667, "step": 300 }, { "epoch": 8.85, "learning_rate": 0.00027613832853025936, "loss": 0.6779, "step": 310 }, { "epoch": 8.99, "eval_loss": 1.1171544790267944, "eval_runtime": 9.1402, "eval_samples_per_second": 6.893, "eval_steps_per_second": 1.751, "eval_wer": 0.42091944327288067, "step": 315 }, { "epoch": 9.14, "learning_rate": 0.0002752737752161383, "loss": 0.7132, "step": 320 }, { "epoch": 9.43, "learning_rate": 0.00027440922190201725, "loss": 0.7306, "step": 330 }, { "epoch": 9.71, "learning_rate": 0.0002735446685878962, "loss": 0.6461, "step": 340 }, { "epoch": 9.99, "learning_rate": 0.0002726801152737752, "loss": 0.6503, "step": 350 }, { "epoch": 9.99, "eval_loss": 0.8998618721961975, "eval_runtime": 8.8852, "eval_samples_per_second": 7.09, "eval_steps_per_second": 1.801, "eval_wer": 0.3833825390130747, "step": 350 }, { "epoch": 10.28, "learning_rate": 0.00027181556195965417, "loss": 0.6801, "step": 360 }, { "epoch": 10.57, "learning_rate": 0.0002709510086455331, "loss": 0.6139, "step": 370 }, { "epoch": 10.85, "learning_rate": 0.00027008645533141206, "loss": 0.6159, "step": 380 }, { "epoch": 10.99, "eval_loss": 1.150137186050415, "eval_runtime": 8.854, "eval_samples_per_second": 7.115, "eval_steps_per_second": 1.807, "eval_wer": 0.4386334879797554, "step": 385 }, { "epoch": 11.14, "learning_rate": 0.00026922190201729104, "loss": 0.8074, "step": 390 }, { "epoch": 11.43, "learning_rate": 0.00026835734870317, "loss": 0.6521, "step": 400 }, { "epoch": 11.71, "learning_rate": 0.000267492795389049, "loss": 0.6608, "step": 410 }, { "epoch": 11.99, "learning_rate": 0.00026662824207492796, "loss": 0.6831, "step": 420 }, { "epoch": 11.99, "eval_loss": 1.0860137939453125, "eval_runtime": 8.8923, "eval_samples_per_second": 7.085, "eval_steps_per_second": 1.799, "eval_wer": 0.38253901307465205, "step": 420 }, { "epoch": 12.28, "learning_rate": 0.0002657636887608069, "loss": 0.6523, "step": 430 }, { "epoch": 12.57, "learning_rate": 0.00026489913544668585, "loss": 0.5839, "step": 440 }, { "epoch": 12.85, "learning_rate": 0.0002640345821325648, "loss": 0.5959, "step": 450 }, { "epoch": 12.99, "eval_loss": 0.9410010576248169, "eval_runtime": 8.8805, "eval_samples_per_second": 7.094, "eval_steps_per_second": 1.802, "eval_wer": 0.4044706874736398, "step": 455 }, { "epoch": 13.14, "learning_rate": 0.00026317002881844374, "loss": 0.6781, "step": 460 }, { "epoch": 13.43, "learning_rate": 0.00026230547550432277, "loss": 0.6091, "step": 470 }, { "epoch": 13.71, "learning_rate": 0.0002614409221902017, "loss": 0.6359, "step": 480 }, { "epoch": 13.99, "learning_rate": 0.00026057636887608066, "loss": 0.7154, "step": 490 }, { "epoch": 13.99, "eval_loss": 1.0462627410888672, "eval_runtime": 9.045, "eval_samples_per_second": 6.965, "eval_steps_per_second": 1.769, "eval_wer": 0.38211725010544073, "step": 490 }, { "epoch": 14.28, "learning_rate": 0.00025971181556195964, "loss": 0.6386, "step": 500 }, { "epoch": 14.57, "learning_rate": 0.0002588472622478386, "loss": 0.5766, "step": 510 }, { "epoch": 14.85, "learning_rate": 0.00025798270893371753, "loss": 0.6094, "step": 520 }, { "epoch": 14.99, "eval_loss": 0.8598027229309082, "eval_runtime": 8.8755, "eval_samples_per_second": 7.098, "eval_steps_per_second": 1.803, "eval_wer": 0.39645719105862504, "step": 525 }, { "epoch": 15.14, "learning_rate": 0.00025711815561959656, "loss": 0.6392, "step": 530 }, { "epoch": 15.43, "learning_rate": 0.0002562536023054755, "loss": 0.6047, "step": 540 }, { "epoch": 15.71, "learning_rate": 0.0002554755043227665, "loss": 1.3821, "step": 550 }, { "epoch": 15.99, "learning_rate": 0.00025461095100864553, "loss": 0.6929, "step": 560 }, { "epoch": 15.99, "eval_loss": 0.9493783116340637, "eval_runtime": 8.8602, "eval_samples_per_second": 7.11, "eval_steps_per_second": 1.806, "eval_wer": 0.39308308730493463, "step": 560 }, { "epoch": 16.28, "learning_rate": 0.00025374639769452445, "loss": 0.6521, "step": 570 }, { "epoch": 16.57, "learning_rate": 0.00025288184438040343, "loss": 0.6791, "step": 580 }, { "epoch": 16.85, "learning_rate": 0.0002520172910662824, "loss": 0.7627, "step": 590 }, { "epoch": 16.99, "eval_loss": 0.8059821128845215, "eval_runtime": 8.8362, "eval_samples_per_second": 7.13, "eval_steps_per_second": 1.811, "eval_wer": 0.39477013918177983, "step": 595 }, { "epoch": 17.14, "learning_rate": 0.0002511527377521614, "loss": 0.6268, "step": 600 }, { "epoch": 17.43, "learning_rate": 0.0002502881844380403, "loss": 0.6496, "step": 610 }, { "epoch": 17.71, "learning_rate": 0.0002494236311239193, "loss": 0.6172, "step": 620 }, { "epoch": 17.99, "learning_rate": 0.00024855907780979824, "loss": 0.601, "step": 630 }, { "epoch": 17.99, "eval_loss": 0.989000678062439, "eval_runtime": 8.8942, "eval_samples_per_second": 7.083, "eval_steps_per_second": 1.799, "eval_wer": 0.39645719105862504, "step": 630 }, { "epoch": 18.28, "learning_rate": 0.0002476945244956772, "loss": 0.6047, "step": 640 }, { "epoch": 18.57, "learning_rate": 0.0002468299711815562, "loss": 0.5297, "step": 650 }, { "epoch": 18.85, "learning_rate": 0.0002459654178674351, "loss": 0.546, "step": 660 }, { "epoch": 18.99, "eval_loss": 0.8058700561523438, "eval_runtime": 9.0371, "eval_samples_per_second": 6.971, "eval_steps_per_second": 1.77, "eval_wer": 0.39898776887389287, "step": 665 }, { "epoch": 19.14, "learning_rate": 0.0002451008645533141, "loss": 0.5887, "step": 670 }, { "epoch": 19.43, "learning_rate": 0.00024423631123919305, "loss": 0.5546, "step": 680 }, { "epoch": 19.71, "learning_rate": 0.00024337175792507203, "loss": 0.6092, "step": 690 }, { "epoch": 19.99, "learning_rate": 0.00024250720461095097, "loss": 0.5222, "step": 700 }, { "epoch": 19.99, "eval_loss": 0.6378720998764038, "eval_runtime": 8.9217, "eval_samples_per_second": 7.061, "eval_steps_per_second": 1.793, "eval_wer": 0.37916490932096164, "step": 700 }, { "epoch": 20.28, "learning_rate": 0.00024164265129682995, "loss": 0.6241, "step": 710 }, { "epoch": 20.57, "learning_rate": 0.00024077809798270892, "loss": 0.6277, "step": 720 }, { "epoch": 20.85, "learning_rate": 0.0002399135446685879, "loss": 0.5802, "step": 730 }, { "epoch": 20.99, "eval_loss": 0.699506402015686, "eval_runtime": 8.8339, "eval_samples_per_second": 7.132, "eval_steps_per_second": 1.811, "eval_wer": 0.3660902572754112, "step": 735 }, { "epoch": 21.14, "learning_rate": 0.00023904899135446684, "loss": 0.6758, "step": 740 }, { "epoch": 21.43, "learning_rate": 0.0002381844380403458, "loss": 0.5938, "step": 750 }, { "epoch": 21.71, "learning_rate": 0.00023731988472622476, "loss": 0.6277, "step": 760 }, { "epoch": 21.99, "learning_rate": 0.0002364553314121037, "loss": 0.5731, "step": 770 }, { "epoch": 21.99, "eval_loss": 0.8405329585075378, "eval_runtime": 8.8637, "eval_samples_per_second": 7.108, "eval_steps_per_second": 1.805, "eval_wer": 0.3606073386756643, "step": 770 }, { "epoch": 22.28, "learning_rate": 0.00023559077809798268, "loss": 0.5488, "step": 780 }, { "epoch": 22.57, "learning_rate": 0.00023472622478386165, "loss": 0.5718, "step": 790 }, { "epoch": 22.85, "learning_rate": 0.00023386167146974063, "loss": 0.5462, "step": 800 }, { "epoch": 22.99, "eval_loss": 0.6666640639305115, "eval_runtime": 8.9034, "eval_samples_per_second": 7.076, "eval_steps_per_second": 1.797, "eval_wer": 0.39645719105862504, "step": 805 }, { "epoch": 23.14, "learning_rate": 0.00023299711815561957, "loss": 0.5111, "step": 810 }, { "epoch": 23.43, "learning_rate": 0.00023213256484149854, "loss": 0.5275, "step": 820 }, { "epoch": 23.71, "learning_rate": 0.0002312680115273775, "loss": 0.5692, "step": 830 }, { "epoch": 23.99, "learning_rate": 0.00023040345821325646, "loss": 0.6057, "step": 840 }, { "epoch": 23.99, "eval_loss": 0.8396028280258179, "eval_runtime": 9.0837, "eval_samples_per_second": 6.935, "eval_steps_per_second": 1.761, "eval_wer": 0.3762125685364825, "step": 840 }, { "epoch": 24.28, "learning_rate": 0.0002295389048991354, "loss": 0.613, "step": 850 }, { "epoch": 24.57, "learning_rate": 0.0002286743515850144, "loss": 0.4814, "step": 860 }, { "epoch": 24.85, "learning_rate": 0.00022780979827089336, "loss": 0.5323, "step": 870 }, { "epoch": 24.99, "eval_loss": 0.9054490327835083, "eval_runtime": 8.8422, "eval_samples_per_second": 7.125, "eval_steps_per_second": 1.81, "eval_wer": 0.39519190215099115, "step": 875 }, { "epoch": 25.14, "learning_rate": 0.0002269452449567723, "loss": 0.5585, "step": 880 }, { "epoch": 25.43, "learning_rate": 0.00022608069164265128, "loss": 0.5142, "step": 890 }, { "epoch": 25.71, "learning_rate": 0.00022521613832853022, "loss": 0.5325, "step": 900 }, { "epoch": 25.99, "learning_rate": 0.0002243515850144092, "loss": 0.683, "step": 910 }, { "epoch": 25.99, "eval_loss": 0.689756453037262, "eval_runtime": 8.8398, "eval_samples_per_second": 7.127, "eval_steps_per_second": 1.81, "eval_wer": 0.40615773935048505, "step": 910 }, { "epoch": 26.28, "learning_rate": 0.00022348703170028814, "loss": 0.5484, "step": 920 }, { "epoch": 26.57, "learning_rate": 0.00022262247838616714, "loss": 0.4704, "step": 930 }, { "epoch": 26.85, "learning_rate": 0.0002217579250720461, "loss": 0.525, "step": 940 }, { "epoch": 26.99, "eval_loss": 0.7244584560394287, "eval_runtime": 8.8634, "eval_samples_per_second": 7.108, "eval_steps_per_second": 1.805, "eval_wer": 0.3884436946436103, "step": 945 }, { "epoch": 27.14, "learning_rate": 0.00022089337175792506, "loss": 0.5174, "step": 950 }, { "epoch": 27.43, "learning_rate": 0.000220028818443804, "loss": 0.5248, "step": 960 }, { "epoch": 27.71, "learning_rate": 0.00021916426512968298, "loss": 0.5203, "step": 970 }, { "epoch": 27.99, "learning_rate": 0.00021829971181556193, "loss": 0.4885, "step": 980 }, { "epoch": 27.99, "eval_loss": 0.807603120803833, "eval_runtime": 8.8102, "eval_samples_per_second": 7.151, "eval_steps_per_second": 1.816, "eval_wer": 0.4048924504428511, "step": 980 }, { "epoch": 28.28, "learning_rate": 0.00021743515850144088, "loss": 0.5582, "step": 990 }, { "epoch": 28.57, "learning_rate": 0.00021657060518731988, "loss": 0.4763, "step": 1000 }, { "epoch": 28.85, "learning_rate": 0.00021570605187319882, "loss": 0.4653, "step": 1010 }, { "epoch": 28.99, "eval_loss": 0.8100489377975464, "eval_runtime": 8.8831, "eval_samples_per_second": 7.092, "eval_steps_per_second": 1.801, "eval_wer": 0.38380430198228593, "step": 1015 }, { "epoch": 29.14, "learning_rate": 0.0002148414985590778, "loss": 0.4895, "step": 1020 }, { "epoch": 29.43, "learning_rate": 0.00021397694524495674, "loss": 0.4331, "step": 1030 }, { "epoch": 29.71, "learning_rate": 0.00021311239193083572, "loss": 0.5435, "step": 1040 }, { "epoch": 29.99, "learning_rate": 0.00021224783861671466, "loss": 0.4827, "step": 1050 }, { "epoch": 29.99, "eval_loss": 0.7246588468551636, "eval_runtime": 8.886, "eval_samples_per_second": 7.09, "eval_steps_per_second": 1.801, "eval_wer": 0.38633487979755377, "step": 1050 }, { "epoch": 30.28, "learning_rate": 0.00021138328530259366, "loss": 0.4931, "step": 1060 }, { "epoch": 30.57, "learning_rate": 0.0002105187319884726, "loss": 0.5082, "step": 1070 }, { "epoch": 30.85, "learning_rate": 0.00020965417867435158, "loss": 0.4839, "step": 1080 }, { "epoch": 30.99, "eval_loss": 0.700883150100708, "eval_runtime": 8.8179, "eval_samples_per_second": 7.145, "eval_steps_per_second": 1.814, "eval_wer": 0.3816954871362294, "step": 1085 }, { "epoch": 31.14, "learning_rate": 0.00020878962536023053, "loss": 0.6065, "step": 1090 }, { "epoch": 31.43, "learning_rate": 0.00020792507204610947, "loss": 0.6116, "step": 1100 }, { "epoch": 31.71, "learning_rate": 0.00020706051873198845, "loss": 0.494, "step": 1110 }, { "epoch": 31.99, "learning_rate": 0.0002061959654178674, "loss": 0.4982, "step": 1120 }, { "epoch": 31.99, "eval_loss": 0.7636587023735046, "eval_runtime": 8.8569, "eval_samples_per_second": 7.113, "eval_steps_per_second": 1.806, "eval_wer": 0.39139603542808943, "step": 1120 }, { "epoch": 32.28, "learning_rate": 0.0002053314121037464, "loss": 0.5046, "step": 1130 }, { "epoch": 32.57, "learning_rate": 0.00020446685878962534, "loss": 0.5496, "step": 1140 }, { "epoch": 32.85, "learning_rate": 0.00020360230547550431, "loss": 0.6105, "step": 1150 }, { "epoch": 32.99, "eval_loss": 0.7342580556869507, "eval_runtime": 8.8973, "eval_samples_per_second": 7.081, "eval_steps_per_second": 1.798, "eval_wer": 0.39139603542808943, "step": 1155 }, { "epoch": 33.14, "learning_rate": 0.00020273775216138326, "loss": 0.4872, "step": 1160 }, { "epoch": 33.43, "learning_rate": 0.00020187319884726223, "loss": 0.4502, "step": 1170 }, { "epoch": 33.71, "learning_rate": 0.00020100864553314118, "loss": 0.7582, "step": 1180 }, { "epoch": 33.99, "learning_rate": 0.00020014409221902018, "loss": 0.4936, "step": 1190 }, { "epoch": 33.99, "eval_loss": 0.7389687299728394, "eval_runtime": 8.8292, "eval_samples_per_second": 7.135, "eval_steps_per_second": 1.812, "eval_wer": 0.3762125685364825, "step": 1190 }, { "epoch": 34.28, "learning_rate": 0.00019927953890489913, "loss": 0.4426, "step": 1200 }, { "epoch": 34.57, "learning_rate": 0.0001984149855907781, "loss": 0.5449, "step": 1210 }, { "epoch": 34.85, "learning_rate": 0.00019755043227665705, "loss": 0.4674, "step": 1220 }, { "epoch": 34.99, "eval_loss": 0.6723992228507996, "eval_runtime": 8.875, "eval_samples_per_second": 7.099, "eval_steps_per_second": 1.803, "eval_wer": 0.35807676086039647, "step": 1225 }, { "epoch": 35.14, "learning_rate": 0.000196685878962536, "loss": 0.4659, "step": 1230 }, { "epoch": 35.43, "learning_rate": 0.00019582132564841497, "loss": 0.5037, "step": 1240 }, { "epoch": 35.71, "learning_rate": 0.0001949567723342939, "loss": 0.4473, "step": 1250 }, { "epoch": 35.99, "learning_rate": 0.0001940922190201729, "loss": 0.4677, "step": 1260 }, { "epoch": 35.99, "eval_loss": 0.6729607582092285, "eval_runtime": 9.0573, "eval_samples_per_second": 6.956, "eval_steps_per_second": 1.767, "eval_wer": 0.3487979755377478, "step": 1260 }, { "epoch": 36.28, "learning_rate": 0.00019322766570605186, "loss": 0.4732, "step": 1270 }, { "epoch": 36.57, "learning_rate": 0.00019236311239193083, "loss": 0.3948, "step": 1280 }, { "epoch": 36.85, "learning_rate": 0.00019149855907780978, "loss": 0.516, "step": 1290 }, { "epoch": 36.99, "eval_loss": 0.6956349611282349, "eval_runtime": 8.8175, "eval_samples_per_second": 7.145, "eval_steps_per_second": 1.815, "eval_wer": 0.3728384647827921, "step": 1295 }, { "epoch": 37.14, "learning_rate": 0.00019063400576368875, "loss": 0.4974, "step": 1300 }, { "epoch": 37.43, "learning_rate": 0.0001897694524495677, "loss": 0.4605, "step": 1310 }, { "epoch": 37.71, "learning_rate": 0.00018890489913544667, "loss": 0.5494, "step": 1320 }, { "epoch": 37.99, "learning_rate": 0.00018804034582132565, "loss": 0.4507, "step": 1330 }, { "epoch": 37.99, "eval_loss": 0.6482787132263184, "eval_runtime": 8.8046, "eval_samples_per_second": 7.155, "eval_steps_per_second": 1.817, "eval_wer": 0.3614508646140869, "step": 1330 }, { "epoch": 38.28, "learning_rate": 0.0001871757925072046, "loss": 0.4868, "step": 1340 }, { "epoch": 38.57, "learning_rate": 0.00018631123919308357, "loss": 0.3934, "step": 1350 }, { "epoch": 38.85, "learning_rate": 0.0001854466858789625, "loss": 0.4207, "step": 1360 }, { "epoch": 38.99, "eval_loss": 0.7718012928962708, "eval_runtime": 8.854, "eval_samples_per_second": 7.115, "eval_steps_per_second": 1.807, "eval_wer": 0.34837621256853646, "step": 1365 }, { "epoch": 39.14, "learning_rate": 0.00018458213256484148, "loss": 0.4837, "step": 1370 }, { "epoch": 39.43, "learning_rate": 0.00018371757925072043, "loss": 0.4663, "step": 1380 }, { "epoch": 39.71, "learning_rate": 0.0001828530259365994, "loss": 0.4572, "step": 1390 }, { "epoch": 39.99, "learning_rate": 0.00018198847262247838, "loss": 0.4803, "step": 1400 }, { "epoch": 39.99, "eval_loss": 0.831644594669342, "eval_runtime": 8.7883, "eval_samples_per_second": 7.169, "eval_steps_per_second": 1.821, "eval_wer": 0.3774778574441164, "step": 1400 }, { "epoch": 40.28, "learning_rate": 0.00018112391930835735, "loss": 0.5059, "step": 1410 }, { "epoch": 40.57, "learning_rate": 0.0001802593659942363, "loss": 0.4758, "step": 1420 }, { "epoch": 40.85, "learning_rate": 0.00017939481268011527, "loss": 0.3946, "step": 1430 }, { "epoch": 40.99, "eval_loss": 0.8322010636329651, "eval_runtime": 8.8486, "eval_samples_per_second": 7.12, "eval_steps_per_second": 1.808, "eval_wer": 0.3568114719527625, "step": 1435 }, { "epoch": 41.14, "learning_rate": 0.00017853025936599422, "loss": 0.4533, "step": 1440 }, { "epoch": 41.43, "learning_rate": 0.00017766570605187316, "loss": 0.4331, "step": 1450 }, { "epoch": 41.71, "learning_rate": 0.00017680115273775214, "loss": 0.4563, "step": 1460 }, { "epoch": 41.99, "learning_rate": 0.0001759365994236311, "loss": 0.411, "step": 1470 }, { "epoch": 41.99, "eval_loss": 0.9933066368103027, "eval_runtime": 8.8766, "eval_samples_per_second": 7.097, "eval_steps_per_second": 1.802, "eval_wer": 0.3707296499367356, "step": 1470 }, { "epoch": 42.28, "learning_rate": 0.00017507204610951008, "loss": 0.4233, "step": 1480 }, { "epoch": 42.57, "learning_rate": 0.00017420749279538903, "loss": 0.4351, "step": 1490 }, { "epoch": 42.85, "learning_rate": 0.000173342939481268, "loss": 0.4405, "step": 1500 }, { "epoch": 42.99, "eval_loss": 0.8789033889770508, "eval_runtime": 8.9051, "eval_samples_per_second": 7.075, "eval_steps_per_second": 1.797, "eval_wer": 0.3943483762125685, "step": 1505 }, { "epoch": 43.14, "learning_rate": 0.00017247838616714695, "loss": 0.4044, "step": 1510 }, { "epoch": 43.43, "learning_rate": 0.00017161383285302592, "loss": 0.4411, "step": 1520 }, { "epoch": 43.71, "learning_rate": 0.00017074927953890487, "loss": 0.4061, "step": 1530 }, { "epoch": 43.99, "learning_rate": 0.00016988472622478387, "loss": 0.5124, "step": 1540 }, { "epoch": 43.99, "eval_loss": 0.9030116200447083, "eval_runtime": 8.8171, "eval_samples_per_second": 7.145, "eval_steps_per_second": 1.815, "eval_wer": 0.3707296499367356, "step": 1540 }, { "epoch": 44.28, "learning_rate": 0.00016902017291066282, "loss": 0.4745, "step": 1550 }, { "epoch": 44.57, "learning_rate": 0.00016815561959654176, "loss": 0.4145, "step": 1560 }, { "epoch": 44.85, "learning_rate": 0.00016729106628242074, "loss": 0.5959, "step": 1570 }, { "epoch": 44.99, "eval_loss": 0.7809452414512634, "eval_runtime": 8.9655, "eval_samples_per_second": 7.027, "eval_steps_per_second": 1.785, "eval_wer": 0.39477013918177983, "step": 1575 }, { "epoch": 45.14, "learning_rate": 0.00016642651296829968, "loss": 0.4527, "step": 1580 }, { "epoch": 45.43, "learning_rate": 0.00016556195965417866, "loss": 0.3988, "step": 1590 }, { "epoch": 45.71, "learning_rate": 0.0001646974063400576, "loss": 0.4175, "step": 1600 }, { "epoch": 45.99, "learning_rate": 0.0001638328530259366, "loss": 0.3841, "step": 1610 }, { "epoch": 45.99, "eval_loss": 0.7716331481933594, "eval_runtime": 8.8671, "eval_samples_per_second": 7.105, "eval_steps_per_second": 1.804, "eval_wer": 0.39645719105862504, "step": 1610 }, { "epoch": 46.28, "learning_rate": 0.00016296829971181555, "loss": 0.4015, "step": 1620 }, { "epoch": 46.57, "learning_rate": 0.00016210374639769452, "loss": 0.453, "step": 1630 }, { "epoch": 46.85, "learning_rate": 0.00016123919308357347, "loss": 0.3975, "step": 1640 }, { "epoch": 46.99, "eval_loss": 0.7064468860626221, "eval_runtime": 8.9124, "eval_samples_per_second": 7.069, "eval_steps_per_second": 1.795, "eval_wer": 0.39308308730493463, "step": 1645 }, { "epoch": 47.14, "learning_rate": 0.00016037463976945244, "loss": 0.5041, "step": 1650 }, { "epoch": 47.43, "learning_rate": 0.0001595100864553314, "loss": 0.4389, "step": 1660 }, { "epoch": 47.71, "learning_rate": 0.00015873198847262245, "loss": 1.7618, "step": 1670 }, { "epoch": 47.99, "learning_rate": 0.00015786743515850142, "loss": 1.4983, "step": 1680 }, { "epoch": 47.99, "eval_loss": 3.286560297012329, "eval_runtime": 8.8248, "eval_samples_per_second": 7.139, "eval_steps_per_second": 1.813, "eval_wer": 0.3627161535217208, "step": 1680 }, { "epoch": 48.28, "learning_rate": 0.00015700288184438037, "loss": 0.6489, "step": 1690 }, { "epoch": 48.57, "learning_rate": 0.00015613832853025937, "loss": 0.4141, "step": 1700 }, { "epoch": 48.85, "learning_rate": 0.00015527377521613831, "loss": 0.3962, "step": 1710 }, { "epoch": 48.99, "eval_loss": 0.6486034989356995, "eval_runtime": 8.9267, "eval_samples_per_second": 7.057, "eval_steps_per_second": 1.792, "eval_wer": 0.36482496836777734, "step": 1715 }, { "epoch": 49.14, "learning_rate": 0.0001544092219020173, "loss": 0.4294, "step": 1720 }, { "epoch": 49.43, "learning_rate": 0.00015354466858789623, "loss": 0.3977, "step": 1730 }, { "epoch": 49.71, "learning_rate": 0.0001526801152737752, "loss": 0.398, "step": 1740 }, { "epoch": 49.99, "learning_rate": 0.00015181556195965415, "loss": 0.4422, "step": 1750 }, { "epoch": 49.99, "eval_loss": 0.8449748158454895, "eval_runtime": 8.8924, "eval_samples_per_second": 7.085, "eval_steps_per_second": 1.799, "eval_wer": 0.3778996204133277, "step": 1750 }, { "epoch": 50.28, "learning_rate": 0.0001509510086455331, "loss": 0.4246, "step": 1760 }, { "epoch": 50.57, "learning_rate": 0.0001500864553314121, "loss": 0.4109, "step": 1770 }, { "epoch": 50.85, "learning_rate": 0.00014922190201729105, "loss": 0.4198, "step": 1780 }, { "epoch": 50.99, "eval_loss": 0.7628262639045715, "eval_runtime": 8.8995, "eval_samples_per_second": 7.079, "eval_steps_per_second": 1.798, "eval_wer": 0.35638970898355127, "step": 1785 }, { "epoch": 51.14, "learning_rate": 0.00014835734870317002, "loss": 0.5414, "step": 1790 }, { "epoch": 51.43, "learning_rate": 0.000147492795389049, "loss": 0.4775, "step": 1800 }, { "epoch": 51.71, "learning_rate": 0.00014662824207492794, "loss": 0.3867, "step": 1810 }, { "epoch": 51.99, "learning_rate": 0.00014576368876080689, "loss": 0.3577, "step": 1820 }, { "epoch": 51.99, "eval_loss": 0.7552944421768188, "eval_runtime": 8.8794, "eval_samples_per_second": 7.095, "eval_steps_per_second": 1.802, "eval_wer": 0.3677773091522564, "step": 1820 }, { "epoch": 52.28, "learning_rate": 0.00014489913544668586, "loss": 0.4055, "step": 1830 }, { "epoch": 52.57, "learning_rate": 0.00014403458213256483, "loss": 0.3304, "step": 1840 }, { "epoch": 52.85, "learning_rate": 0.00014317002881844378, "loss": 0.4425, "step": 1850 }, { "epoch": 52.99, "eval_loss": 0.7565546631813049, "eval_runtime": 8.899, "eval_samples_per_second": 7.079, "eval_steps_per_second": 1.798, "eval_wer": 0.37157317587515815, "step": 1855 }, { "epoch": 53.14, "learning_rate": 0.00014230547550432275, "loss": 0.3659, "step": 1860 }, { "epoch": 53.43, "learning_rate": 0.00014144092219020173, "loss": 0.3904, "step": 1870 }, { "epoch": 53.71, "learning_rate": 0.00014057636887608067, "loss": 0.3905, "step": 1880 }, { "epoch": 53.99, "learning_rate": 0.00013971181556195965, "loss": 0.3492, "step": 1890 }, { "epoch": 53.99, "eval_loss": 0.7709900736808777, "eval_runtime": 8.8285, "eval_samples_per_second": 7.136, "eval_steps_per_second": 1.812, "eval_wer": 0.3631379164909321, "step": 1890 }, { "epoch": 54.28, "learning_rate": 0.00013884726224783862, "loss": 0.4355, "step": 1900 }, { "epoch": 54.57, "learning_rate": 0.00013798270893371757, "loss": 0.3472, "step": 1910 }, { "epoch": 54.85, "learning_rate": 0.00013711815561959654, "loss": 0.3731, "step": 1920 }, { "epoch": 54.99, "eval_loss": 0.7737426161766052, "eval_runtime": 8.9423, "eval_samples_per_second": 7.045, "eval_steps_per_second": 1.789, "eval_wer": 0.3627161535217208, "step": 1925 }, { "epoch": 55.14, "learning_rate": 0.00013625360230547548, "loss": 0.3994, "step": 1930 }, { "epoch": 55.43, "learning_rate": 0.00013538904899135446, "loss": 0.41, "step": 1940 }, { "epoch": 55.71, "learning_rate": 0.0001345244956772334, "loss": 0.3445, "step": 1950 }, { "epoch": 55.99, "learning_rate": 0.00013365994236311238, "loss": 0.3868, "step": 1960 }, { "epoch": 55.99, "eval_loss": 0.7020638585090637, "eval_runtime": 8.7931, "eval_samples_per_second": 7.165, "eval_steps_per_second": 1.82, "eval_wer": 0.35723323492197384, "step": 1960 }, { "epoch": 56.28, "learning_rate": 0.00013279538904899135, "loss": 0.5803, "step": 1970 }, { "epoch": 56.57, "learning_rate": 0.0001319308357348703, "loss": 0.4467, "step": 1980 }, { "epoch": 56.85, "learning_rate": 0.00013106628242074927, "loss": 0.3311, "step": 1990 }, { "epoch": 56.99, "eval_loss": 0.6602898240089417, "eval_runtime": 8.8334, "eval_samples_per_second": 7.132, "eval_steps_per_second": 1.811, "eval_wer": 0.3517503163222269, "step": 1995 }, { "epoch": 57.14, "learning_rate": 0.00013020172910662824, "loss": 0.3973, "step": 2000 }, { "epoch": 57.43, "learning_rate": 0.0001293371757925072, "loss": 0.3832, "step": 2010 }, { "epoch": 57.71, "learning_rate": 0.00012847262247838616, "loss": 0.3418, "step": 2020 }, { "epoch": 57.99, "learning_rate": 0.00012760806916426514, "loss": 0.3993, "step": 2030 }, { "epoch": 57.99, "eval_loss": 0.666448712348938, "eval_runtime": 8.8405, "eval_samples_per_second": 7.126, "eval_steps_per_second": 1.81, "eval_wer": 0.35807676086039647, "step": 2030 }, { "epoch": 58.28, "learning_rate": 0.00012674351585014408, "loss": 0.3879, "step": 2040 }, { "epoch": 58.57, "learning_rate": 0.00012587896253602303, "loss": 0.3436, "step": 2050 }, { "epoch": 58.85, "learning_rate": 0.000125014409221902, "loss": 0.4226, "step": 2060 }, { "epoch": 58.99, "eval_loss": 0.6813275814056396, "eval_runtime": 8.8439, "eval_samples_per_second": 7.124, "eval_steps_per_second": 1.809, "eval_wer": 0.3551244200759173, "step": 2065 }, { "epoch": 59.14, "learning_rate": 0.00012414985590778098, "loss": 0.4158, "step": 2070 }, { "epoch": 59.43, "learning_rate": 0.00012328530259365992, "loss": 0.3934, "step": 2080 }, { "epoch": 59.71, "learning_rate": 0.0001224207492795389, "loss": 0.3911, "step": 2090 }, { "epoch": 59.99, "learning_rate": 0.00012155619596541786, "loss": 0.4143, "step": 2100 }, { "epoch": 59.99, "eval_loss": 0.6566696763038635, "eval_runtime": 8.8077, "eval_samples_per_second": 7.153, "eval_steps_per_second": 1.817, "eval_wer": 0.3568114719527625, "step": 2100 }, { "epoch": 60.28, "learning_rate": 0.00012069164265129682, "loss": 0.3702, "step": 2110 }, { "epoch": 60.57, "learning_rate": 0.00011982708933717579, "loss": 0.3573, "step": 2120 }, { "epoch": 60.85, "learning_rate": 0.00011896253602305475, "loss": 0.3623, "step": 2130 }, { "epoch": 60.99, "eval_loss": 0.6568236947059631, "eval_runtime": 8.8843, "eval_samples_per_second": 7.091, "eval_steps_per_second": 1.801, "eval_wer": 0.34542387178405737, "step": 2135 }, { "epoch": 61.14, "learning_rate": 0.00011809798270893371, "loss": 0.3849, "step": 2140 }, { "epoch": 61.43, "learning_rate": 0.00011723342939481268, "loss": 0.4037, "step": 2150 }, { "epoch": 61.71, "learning_rate": 0.00011636887608069163, "loss": 0.405, "step": 2160 }, { "epoch": 61.99, "learning_rate": 0.00011550432276657059, "loss": 0.3228, "step": 2170 }, { "epoch": 61.99, "eval_loss": 0.7325943112373352, "eval_runtime": 8.898, "eval_samples_per_second": 7.08, "eval_steps_per_second": 1.798, "eval_wer": 0.3568114719527625, "step": 2170 }, { "epoch": 62.28, "learning_rate": 0.00011463976945244955, "loss": 0.363, "step": 2180 }, { "epoch": 62.57, "learning_rate": 0.00011377521613832852, "loss": 0.3833, "step": 2190 }, { "epoch": 62.85, "learning_rate": 0.00011291066282420748, "loss": 0.3204, "step": 2200 }, { "epoch": 62.99, "eval_loss": 0.7277215123176575, "eval_runtime": 8.9735, "eval_samples_per_second": 7.021, "eval_steps_per_second": 1.783, "eval_wer": 0.3639814424293547, "step": 2205 }, { "epoch": 63.14, "learning_rate": 0.00011204610951008644, "loss": 0.3488, "step": 2210 }, { "epoch": 63.43, "learning_rate": 0.00011118155619596542, "loss": 0.3561, "step": 2220 }, { "epoch": 63.71, "learning_rate": 0.00011031700288184437, "loss": 0.376, "step": 2230 }, { "epoch": 63.99, "learning_rate": 0.00010945244956772333, "loss": 0.377, "step": 2240 }, { "epoch": 63.99, "eval_loss": 0.7144931554794312, "eval_runtime": 8.9061, "eval_samples_per_second": 7.074, "eval_steps_per_second": 1.797, "eval_wer": 0.3584985238296078, "step": 2240 }, { "epoch": 64.28, "learning_rate": 0.00010858789625360231, "loss": 0.3267, "step": 2250 }, { "epoch": 64.57, "learning_rate": 0.00010772334293948127, "loss": 0.4145, "step": 2260 }, { "epoch": 64.85, "learning_rate": 0.00010685878962536021, "loss": 0.3487, "step": 2270 }, { "epoch": 64.99, "eval_loss": 0.6943061351776123, "eval_runtime": 8.8303, "eval_samples_per_second": 7.134, "eval_steps_per_second": 1.812, "eval_wer": 0.350485027414593, "step": 2275 }, { "epoch": 65.14, "learning_rate": 0.00010599423631123917, "loss": 0.3827, "step": 2280 }, { "epoch": 65.43, "learning_rate": 0.00010512968299711815, "loss": 0.3328, "step": 2290 }, { "epoch": 65.71, "learning_rate": 0.00010426512968299711, "loss": 0.3904, "step": 2300 }, { "epoch": 65.99, "learning_rate": 0.00010340057636887607, "loss": 0.343, "step": 2310 }, { "epoch": 65.99, "eval_loss": 0.74614018201828, "eval_runtime": 8.7684, "eval_samples_per_second": 7.185, "eval_steps_per_second": 1.825, "eval_wer": 0.33951919021509913, "step": 2310 }, { "epoch": 66.28, "learning_rate": 0.00010253602305475504, "loss": 0.336, "step": 2320 }, { "epoch": 66.57, "learning_rate": 0.000101671469740634, "loss": 0.3663, "step": 2330 }, { "epoch": 66.85, "learning_rate": 0.00010080691642651296, "loss": 0.3251, "step": 2340 }, { "epoch": 66.99, "eval_loss": 0.7441524863243103, "eval_runtime": 8.8232, "eval_samples_per_second": 7.14, "eval_steps_per_second": 1.813, "eval_wer": 0.35638970898355127, "step": 2345 }, { "epoch": 67.14, "learning_rate": 9.994236311239192e-05, "loss": 0.3964, "step": 2350 }, { "epoch": 67.43, "learning_rate": 9.90778097982709e-05, "loss": 0.3658, "step": 2360 }, { "epoch": 67.71, "learning_rate": 9.821325648414985e-05, "loss": 0.3515, "step": 2370 }, { "epoch": 67.99, "learning_rate": 9.73487031700288e-05, "loss": 0.3135, "step": 2380 }, { "epoch": 67.99, "eval_loss": 0.7330945134162903, "eval_runtime": 8.8098, "eval_samples_per_second": 7.151, "eval_steps_per_second": 1.816, "eval_wer": 0.3530156052298608, "step": 2380 }, { "epoch": 68.28, "learning_rate": 9.648414985590777e-05, "loss": 0.345, "step": 2390 }, { "epoch": 68.57, "learning_rate": 9.561959654178673e-05, "loss": 0.3349, "step": 2400 }, { "epoch": 68.85, "learning_rate": 9.475504322766569e-05, "loss": 0.381, "step": 2410 }, { "epoch": 68.99, "eval_loss": 0.7306443452835083, "eval_runtime": 8.8012, "eval_samples_per_second": 7.158, "eval_steps_per_second": 1.818, "eval_wer": 0.3513285533530156, "step": 2415 }, { "epoch": 69.14, "learning_rate": 9.389048991354465e-05, "loss": 0.3782, "step": 2420 }, { "epoch": 69.43, "learning_rate": 9.302593659942363e-05, "loss": 0.3198, "step": 2430 }, { "epoch": 69.71, "learning_rate": 9.216138328530259e-05, "loss": 0.3742, "step": 2440 }, { "epoch": 69.99, "learning_rate": 9.129682997118155e-05, "loss": 0.3319, "step": 2450 }, { "epoch": 69.99, "eval_loss": 0.8494887948036194, "eval_runtime": 8.8477, "eval_samples_per_second": 7.12, "eval_steps_per_second": 1.808, "eval_wer": 0.34837621256853646, "step": 2450 }, { "epoch": 70.28, "learning_rate": 9.043227665706052e-05, "loss": 0.3632, "step": 2460 }, { "epoch": 70.57, "learning_rate": 8.956772334293948e-05, "loss": 0.2967, "step": 2470 }, { "epoch": 70.85, "learning_rate": 8.870317002881844e-05, "loss": 0.3552, "step": 2480 }, { "epoch": 70.99, "eval_loss": 0.7546125054359436, "eval_runtime": 8.821, "eval_samples_per_second": 7.142, "eval_steps_per_second": 1.814, "eval_wer": 0.3551244200759173, "step": 2485 }, { "epoch": 71.14, "learning_rate": 8.783861671469738e-05, "loss": 0.3304, "step": 2490 }, { "epoch": 71.43, "learning_rate": 8.697406340057636e-05, "loss": 0.3718, "step": 2500 }, { "epoch": 71.71, "learning_rate": 8.610951008645532e-05, "loss": 0.3271, "step": 2510 }, { "epoch": 71.99, "learning_rate": 8.524495677233428e-05, "loss": 0.3292, "step": 2520 }, { "epoch": 71.99, "eval_loss": 0.7483251094818115, "eval_runtime": 8.83, "eval_samples_per_second": 7.135, "eval_steps_per_second": 1.812, "eval_wer": 0.34500210881484605, "step": 2520 }, { "epoch": 72.28, "learning_rate": 8.438040345821325e-05, "loss": 0.4341, "step": 2530 }, { "epoch": 72.57, "learning_rate": 8.351585014409221e-05, "loss": 0.3173, "step": 2540 }, { "epoch": 72.85, "learning_rate": 8.265129682997117e-05, "loss": 0.3041, "step": 2550 }, { "epoch": 72.99, "eval_loss": 0.7304584383964539, "eval_runtime": 8.8882, "eval_samples_per_second": 7.088, "eval_steps_per_second": 1.8, "eval_wer": 0.35217207929143823, "step": 2555 }, { "epoch": 73.14, "learning_rate": 8.178674351585014e-05, "loss": 0.345, "step": 2560 }, { "epoch": 73.43, "learning_rate": 8.09221902017291e-05, "loss": 0.3132, "step": 2570 }, { "epoch": 73.71, "learning_rate": 8.005763688760806e-05, "loss": 0.3838, "step": 2580 }, { "epoch": 73.99, "learning_rate": 7.919308357348704e-05, "loss": 0.3606, "step": 2590 }, { "epoch": 73.99, "eval_loss": 0.7358484864234924, "eval_runtime": 8.8826, "eval_samples_per_second": 7.093, "eval_steps_per_second": 1.801, "eval_wer": 0.34837621256853646, "step": 2590 }, { "epoch": 74.28, "learning_rate": 7.832853025936598e-05, "loss": 0.3354, "step": 2600 }, { "epoch": 74.57, "learning_rate": 7.746397694524494e-05, "loss": 0.319, "step": 2610 }, { "epoch": 74.85, "learning_rate": 7.65994236311239e-05, "loss": 0.3629, "step": 2620 }, { "epoch": 74.99, "eval_loss": 0.7708744406700134, "eval_runtime": 8.8268, "eval_samples_per_second": 7.137, "eval_steps_per_second": 1.813, "eval_wer": 0.34458034584563474, "step": 2625 }, { "epoch": 75.14, "learning_rate": 7.573487031700288e-05, "loss": 0.3404, "step": 2630 }, { "epoch": 75.43, "learning_rate": 7.487031700288184e-05, "loss": 0.3678, "step": 2640 }, { "epoch": 75.71, "learning_rate": 7.40057636887608e-05, "loss": 0.2948, "step": 2650 }, { "epoch": 75.99, "learning_rate": 7.314121037463977e-05, "loss": 0.3409, "step": 2660 }, { "epoch": 75.99, "eval_loss": 0.756772518157959, "eval_runtime": 8.8295, "eval_samples_per_second": 7.135, "eval_steps_per_second": 1.812, "eval_wer": 0.3584985238296078, "step": 2660 }, { "epoch": 76.28, "learning_rate": 7.227665706051873e-05, "loss": 0.3575, "step": 2670 }, { "epoch": 76.57, "learning_rate": 7.141210374639769e-05, "loss": 0.3195, "step": 2680 }, { "epoch": 76.85, "learning_rate": 7.054755043227665e-05, "loss": 0.3315, "step": 2690 }, { "epoch": 76.99, "eval_loss": 0.7466398477554321, "eval_runtime": 8.8563, "eval_samples_per_second": 7.114, "eval_steps_per_second": 1.807, "eval_wer": 0.3475326866301139, "step": 2695 }, { "epoch": 77.14, "learning_rate": 6.968299711815561e-05, "loss": 0.334, "step": 2700 }, { "epoch": 77.43, "learning_rate": 6.881844380403457e-05, "loss": 0.2933, "step": 2710 }, { "epoch": 77.71, "learning_rate": 6.795389048991354e-05, "loss": 0.399, "step": 2720 }, { "epoch": 77.99, "learning_rate": 6.70893371757925e-05, "loss": 0.2934, "step": 2730 }, { "epoch": 77.99, "eval_loss": 0.7351065874099731, "eval_runtime": 8.8959, "eval_samples_per_second": 7.082, "eval_steps_per_second": 1.799, "eval_wer": 0.3496415014761704, "step": 2730 }, { "epoch": 78.28, "learning_rate": 6.622478386167146e-05, "loss": 0.3346, "step": 2740 }, { "epoch": 78.57, "learning_rate": 6.536023054755042e-05, "loss": 0.3117, "step": 2750 }, { "epoch": 78.85, "learning_rate": 6.449567723342938e-05, "loss": 0.3366, "step": 2760 }, { "epoch": 78.99, "eval_loss": 0.8014491200447083, "eval_runtime": 8.8741, "eval_samples_per_second": 7.099, "eval_steps_per_second": 1.803, "eval_wer": 0.34837621256853646, "step": 2765 }, { "epoch": 79.14, "learning_rate": 6.363112391930836e-05, "loss": 0.343, "step": 2770 }, { "epoch": 79.43, "learning_rate": 6.276657060518732e-05, "loss": 0.3168, "step": 2780 }, { "epoch": 79.71, "learning_rate": 6.190201729106627e-05, "loss": 0.341, "step": 2790 }, { "epoch": 79.99, "learning_rate": 6.103746397694523e-05, "loss": 0.3176, "step": 2800 }, { "epoch": 79.99, "eval_loss": 0.8014456629753113, "eval_runtime": 8.9378, "eval_samples_per_second": 7.049, "eval_steps_per_second": 1.79, "eval_wer": 0.3420497680303669, "step": 2800 }, { "epoch": 80.28, "learning_rate": 6.01729106628242e-05, "loss": 0.3027, "step": 2810 }, { "epoch": 80.57, "learning_rate": 5.930835734870316e-05, "loss": 0.3063, "step": 2820 }, { "epoch": 80.85, "learning_rate": 5.844380403458213e-05, "loss": 0.3319, "step": 2830 }, { "epoch": 80.99, "eval_loss": 0.799625039100647, "eval_runtime": 8.8579, "eval_samples_per_second": 7.112, "eval_steps_per_second": 1.806, "eval_wer": 0.34373681990721217, "step": 2835 }, { "epoch": 81.14, "learning_rate": 5.7579250720461094e-05, "loss": 0.3286, "step": 2840 }, { "epoch": 81.43, "learning_rate": 5.6714697406340054e-05, "loss": 0.3039, "step": 2850 }, { "epoch": 81.71, "learning_rate": 5.5850144092219014e-05, "loss": 0.3028, "step": 2860 }, { "epoch": 81.99, "learning_rate": 5.4985590778097974e-05, "loss": 0.2967, "step": 2870 }, { "epoch": 81.99, "eval_loss": 0.8156365156173706, "eval_runtime": 8.901, "eval_samples_per_second": 7.078, "eval_steps_per_second": 1.798, "eval_wer": 0.34120624209194433, "step": 2870 }, { "epoch": 82.28, "learning_rate": 5.412103746397694e-05, "loss": 0.327, "step": 2880 }, { "epoch": 82.57, "learning_rate": 5.325648414985591e-05, "loss": 0.2834, "step": 2890 }, { "epoch": 82.85, "learning_rate": 5.239193083573487e-05, "loss": 0.3137, "step": 2900 }, { "epoch": 82.99, "eval_loss": 0.8024904131889343, "eval_runtime": 8.8882, "eval_samples_per_second": 7.088, "eval_steps_per_second": 1.8, "eval_wer": 0.33614508646140867, "step": 2905 }, { "epoch": 83.14, "learning_rate": 5.152737752161383e-05, "loss": 0.2981, "step": 2910 }, { "epoch": 83.43, "learning_rate": 5.066282420749279e-05, "loss": 0.2669, "step": 2920 }, { "epoch": 83.71, "learning_rate": 4.979827089337175e-05, "loss": 0.3344, "step": 2930 }, { "epoch": 83.99, "learning_rate": 4.893371757925072e-05, "loss": 0.3133, "step": 2940 }, { "epoch": 83.99, "eval_loss": 0.778435230255127, "eval_runtime": 8.8669, "eval_samples_per_second": 7.105, "eval_steps_per_second": 1.804, "eval_wer": 0.34162800506115565, "step": 2940 }, { "epoch": 84.28, "learning_rate": 4.806916426512968e-05, "loss": 0.2994, "step": 2950 }, { "epoch": 84.57, "learning_rate": 4.7204610951008646e-05, "loss": 0.2719, "step": 2960 }, { "epoch": 84.85, "learning_rate": 4.63400576368876e-05, "loss": 0.3134, "step": 2970 }, { "epoch": 84.99, "eval_loss": 0.7894186973571777, "eval_runtime": 8.8795, "eval_samples_per_second": 7.095, "eval_steps_per_second": 1.802, "eval_wer": 0.3336145086461409, "step": 2975 }, { "epoch": 85.14, "learning_rate": 4.5475504322766566e-05, "loss": 0.2941, "step": 2980 }, { "epoch": 85.43, "learning_rate": 4.4610951008645526e-05, "loss": 0.2841, "step": 2990 }, { "epoch": 85.71, "learning_rate": 4.374639769452449e-05, "loss": 0.3184, "step": 3000 }, { "epoch": 85.99, "learning_rate": 4.288184438040346e-05, "loss": 0.3216, "step": 3010 }, { "epoch": 85.99, "eval_loss": 0.8330649137496948, "eval_runtime": 8.8914, "eval_samples_per_second": 7.086, "eval_steps_per_second": 1.799, "eval_wer": 0.33951919021509913, "step": 3010 }, { "epoch": 86.28, "learning_rate": 4.201729106628241e-05, "loss": 0.3181, "step": 3020 }, { "epoch": 86.57, "learning_rate": 4.115273775216138e-05, "loss": 0.289, "step": 3030 }, { "epoch": 86.85, "learning_rate": 4.028818443804034e-05, "loss": 0.365, "step": 3040 }, { "epoch": 86.99, "eval_loss": 0.7980260252952576, "eval_runtime": 8.8324, "eval_samples_per_second": 7.133, "eval_steps_per_second": 1.812, "eval_wer": 0.3353015605229861, "step": 3045 }, { "epoch": 87.14, "learning_rate": 3.9423631123919305e-05, "loss": 0.3463, "step": 3050 }, { "epoch": 87.43, "learning_rate": 3.855907780979827e-05, "loss": 0.3385, "step": 3060 }, { "epoch": 87.71, "learning_rate": 3.769452449567723e-05, "loss": 0.2897, "step": 3070 }, { "epoch": 87.99, "learning_rate": 3.682997118155619e-05, "loss": 0.2962, "step": 3080 }, { "epoch": 87.99, "eval_loss": 0.7964635491371155, "eval_runtime": 8.8283, "eval_samples_per_second": 7.136, "eval_steps_per_second": 1.812, "eval_wer": 0.3403627161535217, "step": 3080 }, { "epoch": 88.28, "learning_rate": 3.596541786743515e-05, "loss": 0.3322, "step": 3090 }, { "epoch": 88.57, "learning_rate": 3.510086455331412e-05, "loss": 0.2878, "step": 3100 }, { "epoch": 88.85, "learning_rate": 3.4236311239193085e-05, "loss": 0.3126, "step": 3110 }, { "epoch": 88.99, "eval_loss": 0.7469554543495178, "eval_runtime": 8.8328, "eval_samples_per_second": 7.133, "eval_steps_per_second": 1.811, "eval_wer": 0.3420497680303669, "step": 3115 }, { "epoch": 89.14, "learning_rate": 3.3371757925072045e-05, "loss": 0.3497, "step": 3120 }, { "epoch": 89.43, "learning_rate": 3.2507204610951004e-05, "loss": 0.3006, "step": 3130 }, { "epoch": 89.71, "learning_rate": 3.164265129682997e-05, "loss": 0.3237, "step": 3140 }, { "epoch": 89.99, "learning_rate": 3.077809798270893e-05, "loss": 0.2843, "step": 3150 }, { "epoch": 89.99, "eval_loss": 0.7787944078445435, "eval_runtime": 8.8054, "eval_samples_per_second": 7.155, "eval_steps_per_second": 1.817, "eval_wer": 0.3403627161535217, "step": 3150 }, { "epoch": 90.28, "learning_rate": 2.9913544668587894e-05, "loss": 0.3021, "step": 3160 }, { "epoch": 90.57, "learning_rate": 2.9048991354466857e-05, "loss": 0.2765, "step": 3170 }, { "epoch": 90.85, "learning_rate": 2.8184438040345817e-05, "loss": 0.2967, "step": 3180 }, { "epoch": 90.99, "eval_loss": 0.790235698223114, "eval_runtime": 8.8387, "eval_samples_per_second": 7.128, "eval_steps_per_second": 1.81, "eval_wer": 0.3374103753690426, "step": 3185 }, { "epoch": 91.14, "learning_rate": 2.731988472622478e-05, "loss": 0.2985, "step": 3190 }, { "epoch": 91.43, "learning_rate": 2.6455331412103747e-05, "loss": 0.2949, "step": 3200 }, { "epoch": 91.71, "learning_rate": 2.5590778097982707e-05, "loss": 0.2924, "step": 3210 }, { "epoch": 91.99, "learning_rate": 2.472622478386167e-05, "loss": 0.3171, "step": 3220 }, { "epoch": 91.99, "eval_loss": 0.8021612167358398, "eval_runtime": 8.9042, "eval_samples_per_second": 7.075, "eval_steps_per_second": 1.797, "eval_wer": 0.3403627161535217, "step": 3220 }, { "epoch": 92.28, "learning_rate": 2.386167146974063e-05, "loss": 0.2985, "step": 3230 }, { "epoch": 92.57, "learning_rate": 2.2997118155619593e-05, "loss": 0.2964, "step": 3240 }, { "epoch": 92.85, "learning_rate": 2.213256484149856e-05, "loss": 0.3069, "step": 3250 }, { "epoch": 92.99, "eval_loss": 0.7999234795570374, "eval_runtime": 8.9491, "eval_samples_per_second": 7.04, "eval_steps_per_second": 1.788, "eval_wer": 0.33445803458456347, "step": 3255 }, { "epoch": 93.14, "learning_rate": 2.126801152737752e-05, "loss": 0.3008, "step": 3260 }, { "epoch": 93.43, "learning_rate": 2.0403458213256483e-05, "loss": 0.3183, "step": 3270 }, { "epoch": 93.71, "learning_rate": 1.9538904899135446e-05, "loss": 0.2784, "step": 3280 }, { "epoch": 93.99, "learning_rate": 1.867435158501441e-05, "loss": 0.3571, "step": 3290 }, { "epoch": 93.99, "eval_loss": 0.7896485328674316, "eval_runtime": 8.8589, "eval_samples_per_second": 7.111, "eval_steps_per_second": 1.806, "eval_wer": 0.3403627161535217, "step": 3290 }, { "epoch": 94.28, "learning_rate": 1.780979827089337e-05, "loss": 0.2793, "step": 3300 }, { "epoch": 94.57, "learning_rate": 1.6945244956772332e-05, "loss": 0.3032, "step": 3310 }, { "epoch": 94.85, "learning_rate": 1.6080691642651295e-05, "loss": 0.2805, "step": 3320 }, { "epoch": 94.99, "eval_loss": 0.783069908618927, "eval_runtime": 8.8967, "eval_samples_per_second": 7.081, "eval_steps_per_second": 1.798, "eval_wer": 0.3390974272458878, "step": 3325 }, { "epoch": 95.14, "learning_rate": 1.5216138328530259e-05, "loss": 0.2734, "step": 3330 }, { "epoch": 95.43, "learning_rate": 1.435158501440922e-05, "loss": 0.2819, "step": 3340 }, { "epoch": 95.71, "learning_rate": 1.3487031700288182e-05, "loss": 0.3385, "step": 3350 }, { "epoch": 95.99, "learning_rate": 1.2622478386167147e-05, "loss": 0.3099, "step": 3360 }, { "epoch": 95.99, "eval_loss": 0.790939450263977, "eval_runtime": 8.9444, "eval_samples_per_second": 7.043, "eval_steps_per_second": 1.789, "eval_wer": 0.33656684943062, "step": 3360 }, { "epoch": 96.28, "learning_rate": 1.1757925072046108e-05, "loss": 0.3293, "step": 3370 }, { "epoch": 96.57, "learning_rate": 1.0893371757925071e-05, "loss": 0.2569, "step": 3380 }, { "epoch": 96.85, "learning_rate": 1.0028818443804033e-05, "loss": 0.2868, "step": 3390 }, { "epoch": 96.99, "eval_loss": 0.7918463945388794, "eval_runtime": 9.0538, "eval_samples_per_second": 6.958, "eval_steps_per_second": 1.767, "eval_wer": 0.33951919021509913, "step": 3395 }, { "epoch": 97.14, "learning_rate": 9.164265129682996e-06, "loss": 0.299, "step": 3400 }, { "epoch": 97.43, "learning_rate": 8.29971181556196e-06, "loss": 0.2942, "step": 3410 }, { "epoch": 97.71, "learning_rate": 7.435158501440922e-06, "loss": 0.2931, "step": 3420 }, { "epoch": 97.99, "learning_rate": 6.570605187319884e-06, "loss": 0.2626, "step": 3430 }, { "epoch": 97.99, "eval_loss": 0.776630699634552, "eval_runtime": 8.8695, "eval_samples_per_second": 7.103, "eval_steps_per_second": 1.804, "eval_wer": 0.34289329396878954, "step": 3430 }, { "epoch": 98.28, "learning_rate": 5.706051873198847e-06, "loss": 0.282, "step": 3440 }, { "epoch": 98.57, "learning_rate": 4.841498559077809e-06, "loss": 0.339, "step": 3450 }, { "epoch": 98.85, "learning_rate": 3.976945244956772e-06, "loss": 0.2634, "step": 3460 }, { "epoch": 98.99, "eval_loss": 0.777043879032135, "eval_runtime": 8.8512, "eval_samples_per_second": 7.118, "eval_steps_per_second": 1.808, "eval_wer": 0.34373681990721217, "step": 3465 }, { "epoch": 99.14, "learning_rate": 3.1123919308357346e-06, "loss": 0.3132, "step": 3470 }, { "epoch": 99.43, "learning_rate": 2.247838616714697e-06, "loss": 0.2755, "step": 3480 }, { "epoch": 99.71, "learning_rate": 1.38328530259366e-06, "loss": 0.3658, "step": 3490 }, { "epoch": 99.99, "learning_rate": 5.187319884726225e-07, "loss": 0.288, "step": 3500 }, { "epoch": 99.99, "eval_loss": 0.7762196063995361, "eval_runtime": 8.8123, "eval_samples_per_second": 7.149, "eval_steps_per_second": 1.816, "eval_wer": 0.34289329396878954, "step": 3500 }, { "epoch": 99.99, "step": 3500, "total_flos": 5.739635222597828e+18, "train_loss": 0.4800486422266279, "train_runtime": 12025.3803, "train_samples_per_second": 4.682, "train_steps_per_second": 0.291 } ], "max_steps": 3500, "num_train_epochs": 100, "total_flos": 5.739635222597828e+18, "trial_name": null, "trial_params": null }