{ "best_metric": 21.3524811218986, "best_model_checkpoint": "tamil_models/whisper-medium-ta_alldata_multigpu/checkpoint-8100", "epoch": 2.9779411764705883, "global_step": 8100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.8562520089993833e-06, "loss": 1.5919, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.805947323355403e-06, "loss": 0.7788, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.712504017998767e-06, "loss": 0.5415, "step": 30 }, { "epoch": 0.01, "learning_rate": 6.309637647321797e-06, "loss": 0.403, "step": 40 }, { "epoch": 0.02, "learning_rate": 6.755642637711422e-06, "loss": 0.3221, "step": 50 }, { "epoch": 0.02, "learning_rate": 7.111770744693953e-06, "loss": 0.2477, "step": 60 }, { "epoch": 0.03, "learning_rate": 7.4082398170603955e-06, "loss": 0.2032, "step": 70 }, { "epoch": 0.03, "learning_rate": 7.662199332354785e-06, "loss": 0.1913, "step": 80 }, { "epoch": 0.03, "learning_rate": 7.884325004773166e-06, "loss": 0.1803, "step": 90 }, { "epoch": 0.04, "learning_rate": 8.0817159331721e-06, "loss": 0.1755, "step": 100 }, { "epoch": 0.04, "eval_loss": 0.20166015625, "eval_runtime": 284.2904, "eval_samples_per_second": 9.744, "eval_steps_per_second": 0.077, "eval_wer": 39.73840345199569, "step": 100 }, { "epoch": 0.04, "learning_rate": 8.259332961677815e-06, "loss": 0.1628, "step": 110 }, { "epoch": 0.04, "learning_rate": 8.420779589879445e-06, "loss": 0.1622, "step": 120 }, { "epoch": 0.05, "learning_rate": 8.56875602699815e-06, "loss": 0.1555, "step": 130 }, { "epoch": 0.05, "learning_rate": 8.705337952067442e-06, "loss": 0.1506, "step": 140 }, { "epoch": 0.06, "learning_rate": 8.832155400481049e-06, "loss": 0.1489, "step": 150 }, { "epoch": 0.06, "learning_rate": 8.95051185954238e-06, "loss": 0.1453, "step": 160 }, { "epoch": 0.06, "learning_rate": 9.061466059049972e-06, "loss": 0.143, "step": 170 }, { "epoch": 0.07, "learning_rate": 9.16588965632118e-06, "loss": 0.1402, "step": 180 }, { "epoch": 0.07, "learning_rate": 9.264508868515998e-06, "loss": 0.1386, "step": 190 }, { "epoch": 0.07, "learning_rate": 9.357935131416414e-06, "loss": 0.1374, "step": 200 }, { "epoch": 0.07, "eval_loss": 0.1690673828125, "eval_runtime": 294.2297, "eval_samples_per_second": 9.414, "eval_steps_per_second": 0.075, "eval_wer": 36.370685005393746, "step": 200 }, { "epoch": 0.08, "learning_rate": 9.446688082452126e-06, "loss": 0.1323, "step": 210 }, { "epoch": 0.08, "learning_rate": 9.531213064471803e-06, "loss": 0.1343, "step": 220 }, { "epoch": 0.08, "learning_rate": 9.611894646710806e-06, "loss": 0.1317, "step": 230 }, { "epoch": 0.09, "learning_rate": 9.689067203278456e-06, "loss": 0.125, "step": 240 }, { "epoch": 0.09, "learning_rate": 9.76302328564421e-06, "loss": 0.1323, "step": 250 }, { "epoch": 0.1, "learning_rate": 9.834020319129184e-06, "loss": 0.1286, "step": 260 }, { "epoch": 0.1, "learning_rate": 9.902286010551205e-06, "loss": 0.1225, "step": 270 }, { "epoch": 0.1, "learning_rate": 9.968022753693337e-06, "loss": 0.1282, "step": 280 }, { "epoch": 0.11, "learning_rate": 9.998556998557e-06, "loss": 0.1244, "step": 290 }, { "epoch": 0.11, "learning_rate": 9.994949494949497e-06, "loss": 0.1231, "step": 300 }, { "epoch": 0.11, "eval_loss": 0.1517333984375, "eval_runtime": 240.2543, "eval_samples_per_second": 11.529, "eval_steps_per_second": 0.092, "eval_wer": 33.087243797195256, "step": 300 }, { "epoch": 0.11, "learning_rate": 9.991341991341992e-06, "loss": 0.1244, "step": 310 }, { "epoch": 0.12, "learning_rate": 9.987734487734489e-06, "loss": 0.1163, "step": 320 }, { "epoch": 0.12, "learning_rate": 9.984126984126986e-06, "loss": 0.1194, "step": 330 }, { "epoch": 0.12, "learning_rate": 9.980519480519481e-06, "loss": 0.1191, "step": 340 }, { "epoch": 0.13, "learning_rate": 9.976911976911978e-06, "loss": 0.115, "step": 350 }, { "epoch": 0.13, "learning_rate": 9.973304473304473e-06, "loss": 0.1119, "step": 360 }, { "epoch": 0.14, "learning_rate": 9.96969696969697e-06, "loss": 0.1171, "step": 370 }, { "epoch": 0.14, "learning_rate": 9.966089466089467e-06, "loss": 0.1159, "step": 380 }, { "epoch": 0.14, "learning_rate": 9.962481962481964e-06, "loss": 0.1134, "step": 390 }, { "epoch": 0.15, "learning_rate": 9.95887445887446e-06, "loss": 0.116, "step": 400 }, { "epoch": 0.15, "eval_loss": 0.1417236328125, "eval_runtime": 254.6878, "eval_samples_per_second": 10.876, "eval_steps_per_second": 0.086, "eval_wer": 31.47586299892125, "step": 400 }, { "epoch": 0.15, "learning_rate": 9.955266955266956e-06, "loss": 0.1126, "step": 410 }, { "epoch": 0.15, "learning_rate": 9.951659451659453e-06, "loss": 0.1114, "step": 420 }, { "epoch": 0.16, "learning_rate": 9.94805194805195e-06, "loss": 0.1116, "step": 430 }, { "epoch": 0.16, "learning_rate": 9.944444444444445e-06, "loss": 0.111, "step": 440 }, { "epoch": 0.17, "learning_rate": 9.940836940836942e-06, "loss": 0.1122, "step": 450 }, { "epoch": 0.17, "learning_rate": 9.937229437229437e-06, "loss": 0.1081, "step": 460 }, { "epoch": 0.17, "learning_rate": 9.933621933621934e-06, "loss": 0.1116, "step": 470 }, { "epoch": 0.18, "learning_rate": 9.93001443001443e-06, "loss": 0.1067, "step": 480 }, { "epoch": 0.18, "learning_rate": 9.926406926406928e-06, "loss": 0.1075, "step": 490 }, { "epoch": 0.18, "learning_rate": 9.922799422799425e-06, "loss": 0.1073, "step": 500 }, { "epoch": 0.18, "eval_loss": 0.134765625, "eval_runtime": 864.1715, "eval_samples_per_second": 3.205, "eval_steps_per_second": 0.025, "eval_wer": 30.818500539374327, "step": 500 }, { "epoch": 0.19, "learning_rate": 9.91919191919192e-06, "loss": 0.1085, "step": 510 }, { "epoch": 0.19, "learning_rate": 9.915584415584417e-06, "loss": 0.1079, "step": 520 }, { "epoch": 0.19, "learning_rate": 9.911976911976914e-06, "loss": 0.1068, "step": 530 }, { "epoch": 0.2, "learning_rate": 9.908369408369409e-06, "loss": 0.1096, "step": 540 }, { "epoch": 0.2, "learning_rate": 9.904761904761906e-06, "loss": 0.104, "step": 550 }, { "epoch": 0.21, "learning_rate": 9.901154401154402e-06, "loss": 0.1055, "step": 560 }, { "epoch": 0.21, "learning_rate": 9.897546897546898e-06, "loss": 0.1014, "step": 570 }, { "epoch": 0.21, "learning_rate": 9.893939393939395e-06, "loss": 0.1086, "step": 580 }, { "epoch": 0.22, "learning_rate": 9.890331890331891e-06, "loss": 0.106, "step": 590 }, { "epoch": 0.22, "learning_rate": 9.886724386724388e-06, "loss": 0.1037, "step": 600 }, { "epoch": 0.22, "eval_loss": 0.1295166015625, "eval_runtime": 258.1898, "eval_samples_per_second": 10.729, "eval_steps_per_second": 0.085, "eval_wer": 29.72289644012945, "step": 600 }, { "epoch": 0.22, "learning_rate": 9.883116883116885e-06, "loss": 0.1027, "step": 610 }, { "epoch": 0.23, "learning_rate": 9.87950937950938e-06, "loss": 0.1015, "step": 620 }, { "epoch": 0.23, "learning_rate": 9.875901875901877e-06, "loss": 0.1002, "step": 630 }, { "epoch": 0.24, "learning_rate": 9.872294372294373e-06, "loss": 0.1027, "step": 640 }, { "epoch": 0.24, "learning_rate": 9.86868686868687e-06, "loss": 0.102, "step": 650 }, { "epoch": 0.24, "learning_rate": 9.865079365079366e-06, "loss": 0.0981, "step": 660 }, { "epoch": 0.25, "learning_rate": 9.861471861471862e-06, "loss": 0.0992, "step": 670 }, { "epoch": 0.25, "learning_rate": 9.857864357864358e-06, "loss": 0.1041, "step": 680 }, { "epoch": 0.25, "learning_rate": 9.854256854256855e-06, "loss": 0.0979, "step": 690 }, { "epoch": 0.26, "learning_rate": 9.850649350649352e-06, "loss": 0.0997, "step": 700 }, { "epoch": 0.26, "eval_loss": 0.1251220703125, "eval_runtime": 253.4804, "eval_samples_per_second": 10.928, "eval_steps_per_second": 0.087, "eval_wer": 29.26442826321467, "step": 700 }, { "epoch": 0.26, "learning_rate": 9.847041847041849e-06, "loss": 0.0963, "step": 710 }, { "epoch": 0.26, "learning_rate": 9.843434343434344e-06, "loss": 0.0971, "step": 720 }, { "epoch": 0.27, "learning_rate": 9.839826839826841e-06, "loss": 0.0997, "step": 730 }, { "epoch": 0.27, "learning_rate": 9.836219336219336e-06, "loss": 0.0979, "step": 740 }, { "epoch": 0.28, "learning_rate": 9.832611832611833e-06, "loss": 0.099, "step": 750 }, { "epoch": 0.28, "learning_rate": 9.82900432900433e-06, "loss": 0.0974, "step": 760 }, { "epoch": 0.28, "learning_rate": 9.825396825396825e-06, "loss": 0.0955, "step": 770 }, { "epoch": 0.29, "learning_rate": 9.821789321789322e-06, "loss": 0.0953, "step": 780 }, { "epoch": 0.29, "learning_rate": 9.81818181818182e-06, "loss": 0.0965, "step": 790 }, { "epoch": 0.29, "learning_rate": 9.814574314574316e-06, "loss": 0.0982, "step": 800 }, { "epoch": 0.29, "eval_loss": 0.12176513671875, "eval_runtime": 245.3351, "eval_samples_per_second": 11.291, "eval_steps_per_second": 0.09, "eval_wer": 28.56324163969795, "step": 800 }, { "epoch": 0.3, "learning_rate": 9.810966810966811e-06, "loss": 0.0949, "step": 810 }, { "epoch": 0.3, "learning_rate": 9.807359307359308e-06, "loss": 0.0963, "step": 820 }, { "epoch": 0.31, "learning_rate": 9.803751803751805e-06, "loss": 0.0973, "step": 830 }, { "epoch": 0.31, "learning_rate": 9.8001443001443e-06, "loss": 0.0965, "step": 840 }, { "epoch": 0.31, "learning_rate": 9.796536796536797e-06, "loss": 0.0938, "step": 850 }, { "epoch": 0.32, "learning_rate": 9.792929292929294e-06, "loss": 0.0939, "step": 860 }, { "epoch": 0.32, "learning_rate": 9.789321789321791e-06, "loss": 0.0963, "step": 870 }, { "epoch": 0.32, "learning_rate": 9.785714285714286e-06, "loss": 0.0978, "step": 880 }, { "epoch": 0.33, "learning_rate": 9.782106782106783e-06, "loss": 0.0923, "step": 890 }, { "epoch": 0.33, "learning_rate": 9.77849927849928e-06, "loss": 0.098, "step": 900 }, { "epoch": 0.33, "eval_loss": 0.1192626953125, "eval_runtime": 520.2114, "eval_samples_per_second": 5.325, "eval_steps_per_second": 0.042, "eval_wer": 28.030609492988134, "step": 900 }, { "epoch": 0.33, "learning_rate": 9.774891774891775e-06, "loss": 0.0914, "step": 910 }, { "epoch": 0.34, "learning_rate": 9.771284271284272e-06, "loss": 0.0944, "step": 920 }, { "epoch": 0.34, "learning_rate": 9.767676767676767e-06, "loss": 0.0966, "step": 930 }, { "epoch": 0.35, "learning_rate": 9.764069264069264e-06, "loss": 0.0903, "step": 940 }, { "epoch": 0.35, "learning_rate": 9.760461760461761e-06, "loss": 0.0952, "step": 950 }, { "epoch": 0.35, "learning_rate": 9.756854256854258e-06, "loss": 0.0913, "step": 960 }, { "epoch": 0.36, "learning_rate": 9.753246753246755e-06, "loss": 0.0943, "step": 970 }, { "epoch": 0.36, "learning_rate": 9.74963924963925e-06, "loss": 0.0939, "step": 980 }, { "epoch": 0.36, "learning_rate": 9.746031746031747e-06, "loss": 0.0946, "step": 990 }, { "epoch": 0.37, "learning_rate": 9.742424242424244e-06, "loss": 0.0941, "step": 1000 }, { "epoch": 0.37, "eval_loss": 0.11590576171875, "eval_runtime": 241.569, "eval_samples_per_second": 11.467, "eval_steps_per_second": 0.091, "eval_wer": 27.4608953613808, "step": 1000 }, { "epoch": 0.37, "learning_rate": 9.738816738816739e-06, "loss": 0.0898, "step": 1010 }, { "epoch": 0.38, "learning_rate": 9.735209235209236e-06, "loss": 0.0899, "step": 1020 }, { "epoch": 0.38, "learning_rate": 9.731601731601731e-06, "loss": 0.093, "step": 1030 }, { "epoch": 0.38, "learning_rate": 9.727994227994228e-06, "loss": 0.0897, "step": 1040 }, { "epoch": 0.39, "learning_rate": 9.724386724386725e-06, "loss": 0.095, "step": 1050 }, { "epoch": 0.39, "learning_rate": 9.720779220779222e-06, "loss": 0.0965, "step": 1060 }, { "epoch": 0.39, "learning_rate": 9.717171717171719e-06, "loss": 0.0911, "step": 1070 }, { "epoch": 0.4, "learning_rate": 9.713564213564214e-06, "loss": 0.0903, "step": 1080 }, { "epoch": 0.4, "learning_rate": 9.70995670995671e-06, "loss": 0.0952, "step": 1090 }, { "epoch": 0.4, "learning_rate": 9.706349206349208e-06, "loss": 0.0941, "step": 1100 }, { "epoch": 0.4, "eval_loss": 0.11358642578125, "eval_runtime": 255.0901, "eval_samples_per_second": 10.859, "eval_steps_per_second": 0.086, "eval_wer": 26.965345199568503, "step": 1100 }, { "epoch": 0.41, "learning_rate": 9.702741702741703e-06, "loss": 0.0898, "step": 1110 }, { "epoch": 0.41, "learning_rate": 9.6991341991342e-06, "loss": 0.0917, "step": 1120 }, { "epoch": 0.42, "learning_rate": 9.695526695526695e-06, "loss": 0.0895, "step": 1130 }, { "epoch": 0.42, "learning_rate": 9.691919191919192e-06, "loss": 0.0915, "step": 1140 }, { "epoch": 0.42, "learning_rate": 9.688311688311689e-06, "loss": 0.089, "step": 1150 }, { "epoch": 0.43, "learning_rate": 9.684704184704186e-06, "loss": 0.0887, "step": 1160 }, { "epoch": 0.43, "learning_rate": 9.681096681096683e-06, "loss": 0.0909, "step": 1170 }, { "epoch": 0.43, "learning_rate": 9.67748917748918e-06, "loss": 0.0897, "step": 1180 }, { "epoch": 0.44, "learning_rate": 9.673881673881675e-06, "loss": 0.0917, "step": 1190 }, { "epoch": 0.44, "learning_rate": 9.670274170274172e-06, "loss": 0.0868, "step": 1200 }, { "epoch": 0.44, "eval_loss": 0.11199951171875, "eval_runtime": 247.8554, "eval_samples_per_second": 11.176, "eval_steps_per_second": 0.089, "eval_wer": 27.164239482200646, "step": 1200 }, { "epoch": 0.44, "learning_rate": 9.666666666666667e-06, "loss": 0.091, "step": 1210 }, { "epoch": 0.45, "learning_rate": 9.663059163059164e-06, "loss": 0.0903, "step": 1220 }, { "epoch": 0.45, "learning_rate": 9.65945165945166e-06, "loss": 0.0856, "step": 1230 }, { "epoch": 0.46, "learning_rate": 9.655844155844156e-06, "loss": 0.0907, "step": 1240 }, { "epoch": 0.46, "learning_rate": 9.652236652236653e-06, "loss": 0.0886, "step": 1250 }, { "epoch": 0.46, "learning_rate": 9.64862914862915e-06, "loss": 0.0841, "step": 1260 }, { "epoch": 0.47, "learning_rate": 9.645021645021646e-06, "loss": 0.0855, "step": 1270 }, { "epoch": 0.47, "learning_rate": 9.641414141414143e-06, "loss": 0.0843, "step": 1280 }, { "epoch": 0.47, "learning_rate": 9.637806637806638e-06, "loss": 0.0901, "step": 1290 }, { "epoch": 0.48, "learning_rate": 9.634199134199135e-06, "loss": 0.0899, "step": 1300 }, { "epoch": 0.48, "eval_loss": 0.10968017578125, "eval_runtime": 245.0559, "eval_samples_per_second": 11.304, "eval_steps_per_second": 0.09, "eval_wer": 26.581040992448756, "step": 1300 }, { "epoch": 0.48, "learning_rate": 9.63059163059163e-06, "loss": 0.0857, "step": 1310 }, { "epoch": 0.49, "learning_rate": 9.626984126984127e-06, "loss": 0.0879, "step": 1320 }, { "epoch": 0.49, "learning_rate": 9.623376623376624e-06, "loss": 0.0909, "step": 1330 }, { "epoch": 0.49, "learning_rate": 9.61976911976912e-06, "loss": 0.0859, "step": 1340 }, { "epoch": 0.5, "learning_rate": 9.616161616161616e-06, "loss": 0.0844, "step": 1350 }, { "epoch": 0.5, "learning_rate": 9.612554112554113e-06, "loss": 0.0878, "step": 1360 }, { "epoch": 0.5, "learning_rate": 9.60894660894661e-06, "loss": 0.0867, "step": 1370 }, { "epoch": 0.51, "learning_rate": 9.605339105339107e-06, "loss": 0.0885, "step": 1380 }, { "epoch": 0.51, "learning_rate": 9.601731601731602e-06, "loss": 0.0856, "step": 1390 }, { "epoch": 0.51, "learning_rate": 9.5981240981241e-06, "loss": 0.0888, "step": 1400 }, { "epoch": 0.51, "eval_loss": 0.1080322265625, "eval_runtime": 243.3367, "eval_samples_per_second": 11.383, "eval_steps_per_second": 0.09, "eval_wer": 26.007955771305287, "step": 1400 }, { "epoch": 0.52, "learning_rate": 9.594516594516594e-06, "loss": 0.0872, "step": 1410 }, { "epoch": 0.52, "learning_rate": 9.590909090909091e-06, "loss": 0.0861, "step": 1420 }, { "epoch": 0.53, "learning_rate": 9.587301587301588e-06, "loss": 0.0852, "step": 1430 }, { "epoch": 0.53, "learning_rate": 9.583694083694083e-06, "loss": 0.0866, "step": 1440 }, { "epoch": 0.53, "learning_rate": 9.58008658008658e-06, "loss": 0.0842, "step": 1450 }, { "epoch": 0.54, "learning_rate": 9.576479076479077e-06, "loss": 0.0861, "step": 1460 }, { "epoch": 0.54, "learning_rate": 9.572871572871574e-06, "loss": 0.086, "step": 1470 }, { "epoch": 0.54, "learning_rate": 9.569264069264071e-06, "loss": 0.0831, "step": 1480 }, { "epoch": 0.55, "learning_rate": 9.565656565656566e-06, "loss": 0.0858, "step": 1490 }, { "epoch": 0.55, "learning_rate": 9.562049062049063e-06, "loss": 0.0871, "step": 1500 }, { "epoch": 0.55, "eval_loss": 0.10699462890625, "eval_runtime": 701.1447, "eval_samples_per_second": 3.951, "eval_steps_per_second": 0.031, "eval_wer": 25.974244875943903, "step": 1500 }, { "epoch": 0.56, "learning_rate": 9.558441558441558e-06, "loss": 0.0805, "step": 1510 }, { "epoch": 0.56, "learning_rate": 9.554834054834055e-06, "loss": 0.0857, "step": 1520 }, { "epoch": 0.56, "learning_rate": 9.551226551226552e-06, "loss": 0.0855, "step": 1530 }, { "epoch": 0.57, "learning_rate": 9.547619047619049e-06, "loss": 0.0855, "step": 1540 }, { "epoch": 0.57, "learning_rate": 9.544011544011544e-06, "loss": 0.0872, "step": 1550 }, { "epoch": 0.57, "learning_rate": 9.540404040404041e-06, "loss": 0.0866, "step": 1560 }, { "epoch": 0.58, "learning_rate": 9.536796536796538e-06, "loss": 0.0852, "step": 1570 }, { "epoch": 0.58, "learning_rate": 9.533189033189035e-06, "loss": 0.0878, "step": 1580 }, { "epoch": 0.58, "learning_rate": 9.52958152958153e-06, "loss": 0.0855, "step": 1590 }, { "epoch": 0.59, "learning_rate": 9.525974025974027e-06, "loss": 0.0848, "step": 1600 }, { "epoch": 0.59, "eval_loss": 0.10595703125, "eval_runtime": 254.3389, "eval_samples_per_second": 10.891, "eval_steps_per_second": 0.086, "eval_wer": 25.589940668824163, "step": 1600 }, { "epoch": 0.59, "learning_rate": 9.522366522366522e-06, "loss": 0.0893, "step": 1610 }, { "epoch": 0.6, "learning_rate": 9.518759018759019e-06, "loss": 0.0822, "step": 1620 }, { "epoch": 0.6, "learning_rate": 9.515151515151516e-06, "loss": 0.0813, "step": 1630 }, { "epoch": 0.6, "learning_rate": 9.511544011544013e-06, "loss": 0.0844, "step": 1640 }, { "epoch": 0.61, "learning_rate": 9.507936507936508e-06, "loss": 0.0823, "step": 1650 }, { "epoch": 0.61, "learning_rate": 9.504329004329005e-06, "loss": 0.0868, "step": 1660 }, { "epoch": 0.61, "learning_rate": 9.500721500721502e-06, "loss": 0.0851, "step": 1670 }, { "epoch": 0.62, "learning_rate": 9.497113997113999e-06, "loss": 0.0818, "step": 1680 }, { "epoch": 0.62, "learning_rate": 9.493506493506494e-06, "loss": 0.0835, "step": 1690 }, { "epoch": 0.62, "learning_rate": 9.48989898989899e-06, "loss": 0.0835, "step": 1700 }, { "epoch": 0.62, "eval_loss": 0.1044921875, "eval_runtime": 1330.4645, "eval_samples_per_second": 2.082, "eval_steps_per_second": 0.017, "eval_wer": 25.573085221143472, "step": 1700 }, { "epoch": 0.63, "learning_rate": 9.486291486291486e-06, "loss": 0.0841, "step": 1710 }, { "epoch": 0.63, "learning_rate": 9.482683982683983e-06, "loss": 0.0858, "step": 1720 }, { "epoch": 0.64, "learning_rate": 9.47907647907648e-06, "loss": 0.0845, "step": 1730 }, { "epoch": 0.64, "learning_rate": 9.475468975468977e-06, "loss": 0.0854, "step": 1740 }, { "epoch": 0.64, "learning_rate": 9.471861471861472e-06, "loss": 0.083, "step": 1750 }, { "epoch": 0.65, "learning_rate": 9.468253968253969e-06, "loss": 0.0834, "step": 1760 }, { "epoch": 0.65, "learning_rate": 9.464646464646466e-06, "loss": 0.0811, "step": 1770 }, { "epoch": 0.65, "learning_rate": 9.461038961038963e-06, "loss": 0.0882, "step": 1780 }, { "epoch": 0.66, "learning_rate": 9.457431457431458e-06, "loss": 0.0829, "step": 1790 }, { "epoch": 0.66, "learning_rate": 9.453823953823955e-06, "loss": 0.08, "step": 1800 }, { "epoch": 0.66, "eval_loss": 0.10369873046875, "eval_runtime": 244.3089, "eval_samples_per_second": 11.338, "eval_steps_per_second": 0.09, "eval_wer": 25.13484358144552, "step": 1800 }, { "epoch": 0.67, "learning_rate": 9.45021645021645e-06, "loss": 0.0853, "step": 1810 }, { "epoch": 0.67, "learning_rate": 9.446608946608947e-06, "loss": 0.0826, "step": 1820 }, { "epoch": 0.67, "learning_rate": 9.443001443001444e-06, "loss": 0.0828, "step": 1830 }, { "epoch": 0.68, "learning_rate": 9.43939393939394e-06, "loss": 0.0797, "step": 1840 }, { "epoch": 0.68, "learning_rate": 9.435786435786437e-06, "loss": 0.0814, "step": 1850 }, { "epoch": 0.68, "learning_rate": 9.432178932178933e-06, "loss": 0.0827, "step": 1860 }, { "epoch": 0.69, "learning_rate": 9.42857142857143e-06, "loss": 0.0789, "step": 1870 }, { "epoch": 0.69, "learning_rate": 9.424963924963926e-06, "loss": 0.0838, "step": 1880 }, { "epoch": 0.69, "learning_rate": 9.421356421356422e-06, "loss": 0.0853, "step": 1890 }, { "epoch": 0.7, "learning_rate": 9.417748917748919e-06, "loss": 0.0819, "step": 1900 }, { "epoch": 0.7, "eval_loss": 0.10162353515625, "eval_runtime": 705.4564, "eval_samples_per_second": 3.927, "eval_steps_per_second": 0.031, "eval_wer": 24.976402373247033, "step": 1900 }, { "epoch": 0.7, "learning_rate": 9.414141414141414e-06, "loss": 0.0821, "step": 1910 }, { "epoch": 0.71, "learning_rate": 9.41053391053391e-06, "loss": 0.0813, "step": 1920 }, { "epoch": 0.71, "learning_rate": 9.406926406926408e-06, "loss": 0.0814, "step": 1930 }, { "epoch": 0.71, "learning_rate": 9.403318903318904e-06, "loss": 0.0856, "step": 1940 }, { "epoch": 0.72, "learning_rate": 9.399711399711401e-06, "loss": 0.0842, "step": 1950 }, { "epoch": 0.72, "learning_rate": 9.396103896103896e-06, "loss": 0.0806, "step": 1960 }, { "epoch": 0.72, "learning_rate": 9.392496392496393e-06, "loss": 0.0794, "step": 1970 }, { "epoch": 0.73, "learning_rate": 9.38888888888889e-06, "loss": 0.084, "step": 1980 }, { "epoch": 0.73, "learning_rate": 9.385281385281385e-06, "loss": 0.0853, "step": 1990 }, { "epoch": 0.74, "learning_rate": 9.381673881673882e-06, "loss": 0.0801, "step": 2000 }, { "epoch": 0.74, "eval_loss": 0.100830078125, "eval_runtime": 693.4811, "eval_samples_per_second": 3.994, "eval_steps_per_second": 0.032, "eval_wer": 24.96628910463862, "step": 2000 }, { "epoch": 0.74, "learning_rate": 9.378066378066378e-06, "loss": 0.0807, "step": 2010 }, { "epoch": 0.74, "learning_rate": 9.374458874458874e-06, "loss": 0.0807, "step": 2020 }, { "epoch": 0.75, "learning_rate": 9.370851370851371e-06, "loss": 0.08, "step": 2030 }, { "epoch": 0.75, "learning_rate": 9.367243867243868e-06, "loss": 0.0829, "step": 2040 }, { "epoch": 0.75, "learning_rate": 9.363636363636365e-06, "loss": 0.0805, "step": 2050 }, { "epoch": 0.76, "learning_rate": 9.36002886002886e-06, "loss": 0.0807, "step": 2060 }, { "epoch": 0.76, "learning_rate": 9.356421356421357e-06, "loss": 0.0826, "step": 2070 }, { "epoch": 0.76, "learning_rate": 9.352813852813854e-06, "loss": 0.0796, "step": 2080 }, { "epoch": 0.77, "learning_rate": 9.34920634920635e-06, "loss": 0.0786, "step": 2090 }, { "epoch": 0.77, "learning_rate": 9.345598845598846e-06, "loss": 0.0793, "step": 2100 }, { "epoch": 0.77, "eval_loss": 0.09967041015625, "eval_runtime": 817.1155, "eval_samples_per_second": 3.39, "eval_steps_per_second": 0.027, "eval_wer": 24.504449838187703, "step": 2100 }, { "epoch": 0.78, "learning_rate": 9.341991341991343e-06, "loss": 0.0808, "step": 2110 }, { "epoch": 0.78, "learning_rate": 9.338383838383838e-06, "loss": 0.0814, "step": 2120 }, { "epoch": 0.78, "learning_rate": 9.334776334776335e-06, "loss": 0.0832, "step": 2130 }, { "epoch": 0.79, "learning_rate": 9.331168831168832e-06, "loss": 0.0812, "step": 2140 }, { "epoch": 0.79, "learning_rate": 9.327561327561329e-06, "loss": 0.0776, "step": 2150 }, { "epoch": 0.79, "learning_rate": 9.323953823953826e-06, "loss": 0.0807, "step": 2160 }, { "epoch": 0.8, "learning_rate": 9.320346320346321e-06, "loss": 0.0831, "step": 2170 }, { "epoch": 0.8, "learning_rate": 9.316738816738818e-06, "loss": 0.0795, "step": 2180 }, { "epoch": 0.81, "learning_rate": 9.313131313131313e-06, "loss": 0.079, "step": 2190 }, { "epoch": 0.81, "learning_rate": 9.30952380952381e-06, "loss": 0.081, "step": 2200 }, { "epoch": 0.81, "eval_loss": 0.09979248046875, "eval_runtime": 515.1218, "eval_samples_per_second": 5.377, "eval_steps_per_second": 0.043, "eval_wer": 24.514563106796118, "step": 2200 }, { "epoch": 0.81, "learning_rate": 9.305916305916307e-06, "loss": 0.083, "step": 2210 }, { "epoch": 0.82, "learning_rate": 9.302308802308802e-06, "loss": 0.0823, "step": 2220 }, { "epoch": 0.82, "learning_rate": 9.298701298701299e-06, "loss": 0.0809, "step": 2230 }, { "epoch": 0.82, "learning_rate": 9.295093795093796e-06, "loss": 0.0795, "step": 2240 }, { "epoch": 0.83, "learning_rate": 9.291486291486293e-06, "loss": 0.0786, "step": 2250 }, { "epoch": 0.83, "learning_rate": 9.28787878787879e-06, "loss": 0.0838, "step": 2260 }, { "epoch": 0.83, "learning_rate": 9.284271284271285e-06, "loss": 0.0788, "step": 2270 }, { "epoch": 0.84, "learning_rate": 9.280663780663782e-06, "loss": 0.0788, "step": 2280 }, { "epoch": 0.84, "learning_rate": 9.277056277056277e-06, "loss": 0.0765, "step": 2290 }, { "epoch": 0.85, "learning_rate": 9.273448773448774e-06, "loss": 0.079, "step": 2300 }, { "epoch": 0.85, "eval_loss": 0.0982666015625, "eval_runtime": 568.3193, "eval_samples_per_second": 4.874, "eval_steps_per_second": 0.039, "eval_wer": 24.733683926645092, "step": 2300 }, { "epoch": 0.85, "learning_rate": 9.26984126984127e-06, "loss": 0.0772, "step": 2310 }, { "epoch": 0.85, "learning_rate": 9.266233766233766e-06, "loss": 0.0808, "step": 2320 }, { "epoch": 0.86, "learning_rate": 9.262626262626263e-06, "loss": 0.0786, "step": 2330 }, { "epoch": 0.86, "learning_rate": 9.25901875901876e-06, "loss": 0.0787, "step": 2340 }, { "epoch": 0.86, "learning_rate": 9.255411255411257e-06, "loss": 0.0798, "step": 2350 }, { "epoch": 0.87, "learning_rate": 9.251803751803754e-06, "loss": 0.0808, "step": 2360 }, { "epoch": 0.87, "learning_rate": 9.248196248196249e-06, "loss": 0.0753, "step": 2370 }, { "epoch": 0.88, "learning_rate": 9.244588744588746e-06, "loss": 0.0801, "step": 2380 }, { "epoch": 0.88, "learning_rate": 9.240981240981241e-06, "loss": 0.0794, "step": 2390 }, { "epoch": 0.88, "learning_rate": 9.237373737373738e-06, "loss": 0.0758, "step": 2400 }, { "epoch": 0.88, "eval_loss": 0.09796142578125, "eval_runtime": 249.0453, "eval_samples_per_second": 11.122, "eval_steps_per_second": 0.088, "eval_wer": 24.747168284789645, "step": 2400 }, { "epoch": 0.89, "learning_rate": 9.233766233766235e-06, "loss": 0.0798, "step": 2410 }, { "epoch": 0.89, "learning_rate": 9.230158730158732e-06, "loss": 0.082, "step": 2420 }, { "epoch": 0.89, "learning_rate": 9.226551226551227e-06, "loss": 0.0796, "step": 2430 }, { "epoch": 0.9, "learning_rate": 9.222943722943724e-06, "loss": 0.0787, "step": 2440 }, { "epoch": 0.9, "learning_rate": 9.21933621933622e-06, "loss": 0.075, "step": 2450 }, { "epoch": 0.9, "learning_rate": 9.215728715728717e-06, "loss": 0.0778, "step": 2460 }, { "epoch": 0.91, "learning_rate": 9.212121212121213e-06, "loss": 0.077, "step": 2470 }, { "epoch": 0.91, "learning_rate": 9.20851370851371e-06, "loss": 0.0747, "step": 2480 }, { "epoch": 0.92, "learning_rate": 9.204906204906205e-06, "loss": 0.075, "step": 2490 }, { "epoch": 0.92, "learning_rate": 9.201298701298702e-06, "loss": 0.0806, "step": 2500 }, { "epoch": 0.92, "eval_loss": 0.0966796875, "eval_runtime": 487.9447, "eval_samples_per_second": 5.677, "eval_steps_per_second": 0.045, "eval_wer": 23.77629449838188, "step": 2500 }, { "epoch": 0.92, "learning_rate": 9.197691197691199e-06, "loss": 0.0786, "step": 2510 }, { "epoch": 0.93, "learning_rate": 9.194083694083695e-06, "loss": 0.0789, "step": 2520 }, { "epoch": 0.93, "learning_rate": 9.19047619047619e-06, "loss": 0.0765, "step": 2530 }, { "epoch": 0.93, "learning_rate": 9.186868686868688e-06, "loss": 0.0769, "step": 2540 }, { "epoch": 0.94, "learning_rate": 9.183261183261184e-06, "loss": 0.0773, "step": 2550 }, { "epoch": 0.94, "learning_rate": 9.179653679653681e-06, "loss": 0.076, "step": 2560 }, { "epoch": 0.94, "learning_rate": 9.176046176046177e-06, "loss": 0.0751, "step": 2570 }, { "epoch": 0.95, "learning_rate": 9.172438672438673e-06, "loss": 0.0764, "step": 2580 }, { "epoch": 0.95, "learning_rate": 9.168831168831169e-06, "loss": 0.0767, "step": 2590 }, { "epoch": 0.96, "learning_rate": 9.165223665223666e-06, "loss": 0.077, "step": 2600 }, { "epoch": 0.96, "eval_loss": 0.095703125, "eval_runtime": 770.8972, "eval_samples_per_second": 3.593, "eval_steps_per_second": 0.029, "eval_wer": 24.029126213592235, "step": 2600 }, { "epoch": 0.96, "learning_rate": 9.161616161616162e-06, "loss": 0.0777, "step": 2610 }, { "epoch": 0.96, "learning_rate": 9.15800865800866e-06, "loss": 0.0773, "step": 2620 }, { "epoch": 0.97, "learning_rate": 9.154401154401155e-06, "loss": 0.0801, "step": 2630 }, { "epoch": 0.97, "learning_rate": 9.150793650793651e-06, "loss": 0.0799, "step": 2640 }, { "epoch": 0.97, "learning_rate": 9.147186147186148e-06, "loss": 0.0778, "step": 2650 }, { "epoch": 0.98, "learning_rate": 9.143578643578645e-06, "loss": 0.0757, "step": 2660 }, { "epoch": 0.98, "learning_rate": 9.13997113997114e-06, "loss": 0.0797, "step": 2670 }, { "epoch": 0.99, "learning_rate": 9.136363636363637e-06, "loss": 0.0756, "step": 2680 }, { "epoch": 0.99, "learning_rate": 9.132756132756132e-06, "loss": 0.0759, "step": 2690 }, { "epoch": 0.99, "learning_rate": 9.12914862914863e-06, "loss": 0.0797, "step": 2700 }, { "epoch": 0.99, "eval_loss": 0.09466552734375, "eval_runtime": 554.2007, "eval_samples_per_second": 4.998, "eval_steps_per_second": 0.04, "eval_wer": 23.678532901833872, "step": 2700 }, { "epoch": 1.0, "learning_rate": 9.125541125541126e-06, "loss": 0.0775, "step": 2710 }, { "epoch": 1.0, "learning_rate": 9.121933621933623e-06, "loss": 0.0774, "step": 2720 }, { "epoch": 1.0, "learning_rate": 9.11832611832612e-06, "loss": 0.0691, "step": 2730 }, { "epoch": 1.01, "learning_rate": 9.114718614718615e-06, "loss": 0.067, "step": 2740 }, { "epoch": 1.01, "learning_rate": 9.111111111111112e-06, "loss": 0.0684, "step": 2750 }, { "epoch": 1.01, "learning_rate": 9.107503607503609e-06, "loss": 0.0688, "step": 2760 }, { "epoch": 1.02, "learning_rate": 9.103896103896104e-06, "loss": 0.0667, "step": 2770 }, { "epoch": 1.02, "learning_rate": 9.100288600288601e-06, "loss": 0.0664, "step": 2780 }, { "epoch": 1.03, "learning_rate": 9.096681096681096e-06, "loss": 0.0654, "step": 2790 }, { "epoch": 1.03, "learning_rate": 9.093073593073593e-06, "loss": 0.0697, "step": 2800 }, { "epoch": 1.03, "eval_loss": 0.09423828125, "eval_runtime": 378.3675, "eval_samples_per_second": 7.321, "eval_steps_per_second": 0.058, "eval_wer": 23.570658036677454, "step": 2800 }, { "epoch": 1.03, "learning_rate": 9.08946608946609e-06, "loss": 0.0675, "step": 2810 }, { "epoch": 1.04, "learning_rate": 9.085858585858587e-06, "loss": 0.0684, "step": 2820 }, { "epoch": 1.04, "learning_rate": 9.082251082251084e-06, "loss": 0.0697, "step": 2830 }, { "epoch": 1.04, "learning_rate": 9.078643578643579e-06, "loss": 0.0678, "step": 2840 }, { "epoch": 1.05, "learning_rate": 9.075036075036076e-06, "loss": 0.0673, "step": 2850 }, { "epoch": 1.05, "learning_rate": 9.071428571428573e-06, "loss": 0.0691, "step": 2860 }, { "epoch": 1.06, "learning_rate": 9.067821067821068e-06, "loss": 0.0657, "step": 2870 }, { "epoch": 1.06, "learning_rate": 9.064213564213565e-06, "loss": 0.067, "step": 2880 }, { "epoch": 1.06, "learning_rate": 9.06060606060606e-06, "loss": 0.0695, "step": 2890 }, { "epoch": 1.07, "learning_rate": 9.056998556998557e-06, "loss": 0.0685, "step": 2900 }, { "epoch": 1.07, "eval_loss": 0.09454345703125, "eval_runtime": 252.9239, "eval_samples_per_second": 10.952, "eval_steps_per_second": 0.087, "eval_wer": 23.5571736785329, "step": 2900 }, { "epoch": 1.07, "learning_rate": 9.053391053391054e-06, "loss": 0.0688, "step": 2910 }, { "epoch": 1.07, "learning_rate": 9.049783549783551e-06, "loss": 0.0657, "step": 2920 }, { "epoch": 1.08, "learning_rate": 9.046176046176048e-06, "loss": 0.068, "step": 2930 }, { "epoch": 1.08, "learning_rate": 9.042568542568543e-06, "loss": 0.0692, "step": 2940 }, { "epoch": 1.08, "learning_rate": 9.03896103896104e-06, "loss": 0.0681, "step": 2950 }, { "epoch": 1.09, "learning_rate": 9.035353535353537e-06, "loss": 0.0699, "step": 2960 }, { "epoch": 1.09, "learning_rate": 9.031746031746032e-06, "loss": 0.0725, "step": 2970 }, { "epoch": 1.1, "learning_rate": 9.028138528138529e-06, "loss": 0.0665, "step": 2980 }, { "epoch": 1.1, "learning_rate": 9.024531024531026e-06, "loss": 0.0666, "step": 2990 }, { "epoch": 1.1, "learning_rate": 9.020923520923521e-06, "loss": 0.0685, "step": 3000 }, { "epoch": 1.1, "eval_loss": 0.094970703125, "eval_runtime": 448.4694, "eval_samples_per_second": 6.177, "eval_steps_per_second": 0.049, "eval_wer": 23.722357065803667, "step": 3000 }, { "epoch": 1.11, "learning_rate": 9.017316017316018e-06, "loss": 0.0692, "step": 3010 }, { "epoch": 1.11, "learning_rate": 9.013708513708515e-06, "loss": 0.067, "step": 3020 }, { "epoch": 1.11, "learning_rate": 9.010101010101012e-06, "loss": 0.0689, "step": 3030 }, { "epoch": 1.12, "learning_rate": 9.006493506493509e-06, "loss": 0.0692, "step": 3040 }, { "epoch": 1.12, "learning_rate": 9.002886002886004e-06, "loss": 0.0696, "step": 3050 }, { "epoch": 1.12, "learning_rate": 8.9992784992785e-06, "loss": 0.0676, "step": 3060 }, { "epoch": 1.13, "learning_rate": 8.995670995670996e-06, "loss": 0.0696, "step": 3070 }, { "epoch": 1.13, "learning_rate": 8.992063492063493e-06, "loss": 0.0702, "step": 3080 }, { "epoch": 1.14, "learning_rate": 8.98845598845599e-06, "loss": 0.0669, "step": 3090 }, { "epoch": 1.14, "learning_rate": 8.984848484848485e-06, "loss": 0.0669, "step": 3100 }, { "epoch": 1.14, "eval_loss": 0.0938720703125, "eval_runtime": 367.7769, "eval_samples_per_second": 7.532, "eval_steps_per_second": 0.06, "eval_wer": 23.59762675296656, "step": 3100 }, { "epoch": 1.14, "learning_rate": 8.981240981240982e-06, "loss": 0.0677, "step": 3110 }, { "epoch": 1.15, "learning_rate": 8.977633477633479e-06, "loss": 0.0684, "step": 3120 }, { "epoch": 1.15, "learning_rate": 8.974025974025975e-06, "loss": 0.0677, "step": 3130 }, { "epoch": 1.15, "learning_rate": 8.970418470418472e-06, "loss": 0.0658, "step": 3140 }, { "epoch": 1.16, "learning_rate": 8.966810966810968e-06, "loss": 0.0689, "step": 3150 }, { "epoch": 1.16, "learning_rate": 8.963203463203464e-06, "loss": 0.0653, "step": 3160 }, { "epoch": 1.17, "learning_rate": 8.95959595959596e-06, "loss": 0.0668, "step": 3170 }, { "epoch": 1.17, "learning_rate": 8.955988455988457e-06, "loss": 0.066, "step": 3180 }, { "epoch": 1.17, "learning_rate": 8.952380952380953e-06, "loss": 0.0668, "step": 3190 }, { "epoch": 1.18, "learning_rate": 8.948773448773449e-06, "loss": 0.0678, "step": 3200 }, { "epoch": 1.18, "eval_loss": 0.09356689453125, "eval_runtime": 288.5179, "eval_samples_per_second": 9.601, "eval_steps_per_second": 0.076, "eval_wer": 23.415587918015103, "step": 3200 }, { "epoch": 1.18, "learning_rate": 8.945165945165946e-06, "loss": 0.0679, "step": 3210 }, { "epoch": 1.18, "learning_rate": 8.941558441558442e-06, "loss": 0.0677, "step": 3220 }, { "epoch": 1.19, "learning_rate": 8.93795093795094e-06, "loss": 0.0694, "step": 3230 }, { "epoch": 1.19, "learning_rate": 8.934343434343436e-06, "loss": 0.0678, "step": 3240 }, { "epoch": 1.19, "learning_rate": 8.930735930735931e-06, "loss": 0.0682, "step": 3250 }, { "epoch": 1.2, "learning_rate": 8.927128427128428e-06, "loss": 0.0674, "step": 3260 }, { "epoch": 1.2, "learning_rate": 8.923520923520924e-06, "loss": 0.0688, "step": 3270 }, { "epoch": 1.21, "learning_rate": 8.91991341991342e-06, "loss": 0.0684, "step": 3280 }, { "epoch": 1.21, "learning_rate": 8.916305916305917e-06, "loss": 0.0659, "step": 3290 }, { "epoch": 1.21, "learning_rate": 8.912698412698414e-06, "loss": 0.0692, "step": 3300 }, { "epoch": 1.21, "eval_loss": 0.09393310546875, "eval_runtime": 478.9048, "eval_samples_per_second": 5.784, "eval_steps_per_second": 0.046, "eval_wer": 23.358279395900755, "step": 3300 }, { "epoch": 1.22, "learning_rate": 8.90909090909091e-06, "loss": 0.0698, "step": 3310 }, { "epoch": 1.22, "learning_rate": 8.905483405483406e-06, "loss": 0.069, "step": 3320 }, { "epoch": 1.22, "learning_rate": 8.901875901875903e-06, "loss": 0.0668, "step": 3330 }, { "epoch": 1.23, "learning_rate": 8.8982683982684e-06, "loss": 0.0663, "step": 3340 }, { "epoch": 1.23, "learning_rate": 8.894660894660895e-06, "loss": 0.0676, "step": 3350 }, { "epoch": 1.24, "learning_rate": 8.891053391053392e-06, "loss": 0.0657, "step": 3360 }, { "epoch": 1.24, "learning_rate": 8.887445887445887e-06, "loss": 0.0667, "step": 3370 }, { "epoch": 1.24, "learning_rate": 8.883838383838384e-06, "loss": 0.0657, "step": 3380 }, { "epoch": 1.25, "learning_rate": 8.880230880230881e-06, "loss": 0.0647, "step": 3390 }, { "epoch": 1.25, "learning_rate": 8.876623376623378e-06, "loss": 0.0664, "step": 3400 }, { "epoch": 1.25, "eval_loss": 0.09320068359375, "eval_runtime": 804.4129, "eval_samples_per_second": 3.444, "eval_steps_per_second": 0.027, "eval_wer": 23.52683387270766, "step": 3400 }, { "epoch": 1.25, "learning_rate": 8.873015873015873e-06, "loss": 0.0668, "step": 3410 }, { "epoch": 1.26, "learning_rate": 8.86940836940837e-06, "loss": 0.0662, "step": 3420 }, { "epoch": 1.26, "learning_rate": 8.865800865800867e-06, "loss": 0.0656, "step": 3430 }, { "epoch": 1.26, "learning_rate": 8.862193362193364e-06, "loss": 0.0668, "step": 3440 }, { "epoch": 1.27, "learning_rate": 8.85858585858586e-06, "loss": 0.0688, "step": 3450 }, { "epoch": 1.27, "learning_rate": 8.854978354978356e-06, "loss": 0.0667, "step": 3460 }, { "epoch": 1.28, "learning_rate": 8.851370851370851e-06, "loss": 0.0703, "step": 3470 }, { "epoch": 1.28, "learning_rate": 8.847763347763348e-06, "loss": 0.0703, "step": 3480 }, { "epoch": 1.28, "learning_rate": 8.844155844155845e-06, "loss": 0.0678, "step": 3490 }, { "epoch": 1.29, "learning_rate": 8.840548340548342e-06, "loss": 0.0684, "step": 3500 }, { "epoch": 1.29, "eval_loss": 0.09197998046875, "eval_runtime": 577.6791, "eval_samples_per_second": 4.795, "eval_steps_per_second": 0.038, "eval_wer": 23.43918554476807, "step": 3500 }, { "epoch": 1.29, "learning_rate": 8.836940836940837e-06, "loss": 0.0679, "step": 3510 }, { "epoch": 1.29, "learning_rate": 8.833333333333334e-06, "loss": 0.068, "step": 3520 }, { "epoch": 1.3, "learning_rate": 8.829725829725831e-06, "loss": 0.0673, "step": 3530 }, { "epoch": 1.3, "learning_rate": 8.826118326118328e-06, "loss": 0.0646, "step": 3540 }, { "epoch": 1.31, "learning_rate": 8.822510822510823e-06, "loss": 0.0667, "step": 3550 }, { "epoch": 1.31, "learning_rate": 8.81890331890332e-06, "loss": 0.0664, "step": 3560 }, { "epoch": 1.31, "learning_rate": 8.815295815295815e-06, "loss": 0.0669, "step": 3570 }, { "epoch": 1.32, "learning_rate": 8.811688311688312e-06, "loss": 0.0681, "step": 3580 }, { "epoch": 1.32, "learning_rate": 8.808080808080809e-06, "loss": 0.0689, "step": 3590 }, { "epoch": 1.32, "learning_rate": 8.804473304473306e-06, "loss": 0.0681, "step": 3600 }, { "epoch": 1.32, "eval_loss": 0.0921630859375, "eval_runtime": 316.0563, "eval_samples_per_second": 8.764, "eval_steps_per_second": 0.07, "eval_wer": 22.82564724919094, "step": 3600 }, { "epoch": 1.33, "learning_rate": 8.800865800865803e-06, "loss": 0.0683, "step": 3610 }, { "epoch": 1.33, "learning_rate": 8.797258297258298e-06, "loss": 0.0662, "step": 3620 }, { "epoch": 1.33, "learning_rate": 8.793650793650795e-06, "loss": 0.0705, "step": 3630 }, { "epoch": 1.34, "learning_rate": 8.79004329004329e-06, "loss": 0.0668, "step": 3640 }, { "epoch": 1.34, "learning_rate": 8.786435786435787e-06, "loss": 0.0649, "step": 3650 }, { "epoch": 1.35, "learning_rate": 8.782828282828284e-06, "loss": 0.0676, "step": 3660 }, { "epoch": 1.35, "learning_rate": 8.779220779220779e-06, "loss": 0.067, "step": 3670 }, { "epoch": 1.35, "learning_rate": 8.775613275613276e-06, "loss": 0.068, "step": 3680 }, { "epoch": 1.36, "learning_rate": 8.772005772005773e-06, "loss": 0.0656, "step": 3690 }, { "epoch": 1.36, "learning_rate": 8.76839826839827e-06, "loss": 0.0668, "step": 3700 }, { "epoch": 1.36, "eval_loss": 0.09124755859375, "eval_runtime": 443.766, "eval_samples_per_second": 6.242, "eval_steps_per_second": 0.05, "eval_wer": 22.866100323624593, "step": 3700 }, { "epoch": 1.36, "learning_rate": 8.764790764790767e-06, "loss": 0.0656, "step": 3710 }, { "epoch": 1.37, "learning_rate": 8.761183261183262e-06, "loss": 0.0672, "step": 3720 }, { "epoch": 1.37, "learning_rate": 8.757575757575759e-06, "loss": 0.0691, "step": 3730 }, { "epoch": 1.38, "learning_rate": 8.753968253968254e-06, "loss": 0.0658, "step": 3740 }, { "epoch": 1.38, "learning_rate": 8.75036075036075e-06, "loss": 0.067, "step": 3750 }, { "epoch": 1.38, "learning_rate": 8.746753246753248e-06, "loss": 0.0659, "step": 3760 }, { "epoch": 1.39, "learning_rate": 8.743145743145743e-06, "loss": 0.0644, "step": 3770 }, { "epoch": 1.39, "learning_rate": 8.73953823953824e-06, "loss": 0.0664, "step": 3780 }, { "epoch": 1.39, "learning_rate": 8.735930735930737e-06, "loss": 0.0674, "step": 3790 }, { "epoch": 1.4, "learning_rate": 8.732323232323234e-06, "loss": 0.0678, "step": 3800 }, { "epoch": 1.4, "eval_loss": 0.09130859375, "eval_runtime": 455.9937, "eval_samples_per_second": 6.075, "eval_steps_per_second": 0.048, "eval_wer": 23.580771305285868, "step": 3800 }, { "epoch": 1.4, "learning_rate": 8.72871572871573e-06, "loss": 0.0658, "step": 3810 }, { "epoch": 1.4, "learning_rate": 8.725108225108226e-06, "loss": 0.0646, "step": 3820 }, { "epoch": 1.41, "learning_rate": 8.721500721500722e-06, "loss": 0.0649, "step": 3830 }, { "epoch": 1.41, "learning_rate": 8.717893217893218e-06, "loss": 0.067, "step": 3840 }, { "epoch": 1.42, "learning_rate": 8.714285714285715e-06, "loss": 0.0656, "step": 3850 }, { "epoch": 1.42, "learning_rate": 8.710678210678211e-06, "loss": 0.0685, "step": 3860 }, { "epoch": 1.42, "learning_rate": 8.707070707070707e-06, "loss": 0.0652, "step": 3870 }, { "epoch": 1.43, "learning_rate": 8.703463203463204e-06, "loss": 0.0686, "step": 3880 }, { "epoch": 1.43, "learning_rate": 8.6998556998557e-06, "loss": 0.0669, "step": 3890 }, { "epoch": 1.43, "learning_rate": 8.696248196248197e-06, "loss": 0.064, "step": 3900 }, { "epoch": 1.43, "eval_loss": 0.09088134765625, "eval_runtime": 323.8053, "eval_samples_per_second": 8.555, "eval_steps_per_second": 0.068, "eval_wer": 23.15601402373247, "step": 3900 }, { "epoch": 1.44, "learning_rate": 8.692640692640694e-06, "loss": 0.0667, "step": 3910 }, { "epoch": 1.44, "learning_rate": 8.68903318903319e-06, "loss": 0.0635, "step": 3920 }, { "epoch": 1.44, "learning_rate": 8.685425685425686e-06, "loss": 0.067, "step": 3930 }, { "epoch": 1.45, "learning_rate": 8.681818181818182e-06, "loss": 0.0644, "step": 3940 }, { "epoch": 1.45, "learning_rate": 8.678210678210678e-06, "loss": 0.0629, "step": 3950 }, { "epoch": 1.46, "learning_rate": 8.674603174603175e-06, "loss": 0.0667, "step": 3960 }, { "epoch": 1.46, "learning_rate": 8.670995670995672e-06, "loss": 0.0664, "step": 3970 }, { "epoch": 1.46, "learning_rate": 8.667388167388167e-06, "loss": 0.0658, "step": 3980 }, { "epoch": 1.47, "learning_rate": 8.663780663780664e-06, "loss": 0.0682, "step": 3990 }, { "epoch": 1.47, "learning_rate": 8.660173160173161e-06, "loss": 0.0663, "step": 4000 }, { "epoch": 1.47, "eval_loss": 0.09112548828125, "eval_runtime": 454.1676, "eval_samples_per_second": 6.099, "eval_steps_per_second": 0.048, "eval_wer": 23.826860841423947, "step": 4000 }, { "epoch": 1.47, "learning_rate": 8.657287157287158e-06, "loss": 0.0664, "step": 4010 }, { "epoch": 1.48, "learning_rate": 8.653679653679655e-06, "loss": 0.0658, "step": 4020 }, { "epoch": 1.48, "learning_rate": 8.650072150072152e-06, "loss": 0.0678, "step": 4030 }, { "epoch": 1.49, "learning_rate": 8.646464646464647e-06, "loss": 0.0699, "step": 4040 }, { "epoch": 1.49, "learning_rate": 8.642857142857144e-06, "loss": 0.0662, "step": 4050 }, { "epoch": 1.49, "learning_rate": 8.639249639249639e-06, "loss": 0.0677, "step": 4060 }, { "epoch": 1.5, "learning_rate": 8.635642135642136e-06, "loss": 0.0651, "step": 4070 }, { "epoch": 1.5, "learning_rate": 8.632034632034633e-06, "loss": 0.0681, "step": 4080 }, { "epoch": 1.5, "learning_rate": 8.62842712842713e-06, "loss": 0.0658, "step": 4090 }, { "epoch": 1.51, "learning_rate": 8.624819624819626e-06, "loss": 0.0657, "step": 4100 }, { "epoch": 1.51, "eval_loss": 0.0904541015625, "eval_runtime": 483.7028, "eval_samples_per_second": 5.727, "eval_steps_per_second": 0.045, "eval_wer": 22.562702265372167, "step": 4100 }, { "epoch": 1.51, "learning_rate": 8.621212121212122e-06, "loss": 0.0668, "step": 4110 }, { "epoch": 1.51, "learning_rate": 8.617604617604618e-06, "loss": 0.066, "step": 4120 }, { "epoch": 1.52, "learning_rate": 8.613997113997115e-06, "loss": 0.0646, "step": 4130 }, { "epoch": 1.52, "learning_rate": 8.61038961038961e-06, "loss": 0.0657, "step": 4140 }, { "epoch": 1.53, "learning_rate": 8.606782106782107e-06, "loss": 0.0658, "step": 4150 }, { "epoch": 1.53, "learning_rate": 8.603174603174604e-06, "loss": 0.0666, "step": 4160 }, { "epoch": 1.53, "learning_rate": 8.5995670995671e-06, "loss": 0.0677, "step": 4170 }, { "epoch": 1.54, "learning_rate": 8.595959595959596e-06, "loss": 0.0667, "step": 4180 }, { "epoch": 1.54, "learning_rate": 8.592352092352093e-06, "loss": 0.0671, "step": 4190 }, { "epoch": 1.54, "learning_rate": 8.58874458874459e-06, "loss": 0.0679, "step": 4200 }, { "epoch": 1.54, "eval_loss": 0.090576171875, "eval_runtime": 527.5145, "eval_samples_per_second": 5.251, "eval_steps_per_second": 0.042, "eval_wer": 22.808791801510246, "step": 4200 }, { "epoch": 1.55, "learning_rate": 8.585137085137087e-06, "loss": 0.0684, "step": 4210 }, { "epoch": 1.55, "learning_rate": 8.581529581529582e-06, "loss": 0.0677, "step": 4220 }, { "epoch": 1.56, "learning_rate": 8.57792207792208e-06, "loss": 0.0646, "step": 4230 }, { "epoch": 1.56, "learning_rate": 8.574314574314574e-06, "loss": 0.068, "step": 4240 }, { "epoch": 1.56, "learning_rate": 8.570707070707071e-06, "loss": 0.0668, "step": 4250 }, { "epoch": 1.57, "learning_rate": 8.567099567099568e-06, "loss": 0.0656, "step": 4260 }, { "epoch": 1.57, "learning_rate": 8.563492063492063e-06, "loss": 0.0654, "step": 4270 }, { "epoch": 1.57, "learning_rate": 8.55988455988456e-06, "loss": 0.0648, "step": 4280 }, { "epoch": 1.58, "learning_rate": 8.556277056277057e-06, "loss": 0.067, "step": 4290 }, { "epoch": 1.58, "learning_rate": 8.552669552669554e-06, "loss": 0.0675, "step": 4300 }, { "epoch": 1.58, "eval_loss": 0.090087890625, "eval_runtime": 250.2835, "eval_samples_per_second": 11.067, "eval_steps_per_second": 0.088, "eval_wer": 22.515507011866234, "step": 4300 }, { "epoch": 1.58, "learning_rate": 8.549062049062051e-06, "loss": 0.0657, "step": 4310 }, { "epoch": 1.59, "learning_rate": 8.545454545454546e-06, "loss": 0.0648, "step": 4320 }, { "epoch": 1.59, "learning_rate": 8.541847041847043e-06, "loss": 0.0679, "step": 4330 }, { "epoch": 1.6, "learning_rate": 8.538239538239538e-06, "loss": 0.0682, "step": 4340 }, { "epoch": 1.6, "learning_rate": 8.534632034632035e-06, "loss": 0.0693, "step": 4350 }, { "epoch": 1.6, "learning_rate": 8.531024531024532e-06, "loss": 0.0637, "step": 4360 }, { "epoch": 1.61, "learning_rate": 8.527417027417027e-06, "loss": 0.068, "step": 4370 }, { "epoch": 1.61, "learning_rate": 8.523809523809524e-06, "loss": 0.0688, "step": 4380 }, { "epoch": 1.61, "learning_rate": 8.520202020202021e-06, "loss": 0.0643, "step": 4390 }, { "epoch": 1.62, "learning_rate": 8.516594516594518e-06, "loss": 0.0673, "step": 4400 }, { "epoch": 1.62, "eval_loss": 0.0897216796875, "eval_runtime": 333.8313, "eval_samples_per_second": 8.298, "eval_steps_per_second": 0.066, "eval_wer": 22.43797195253506, "step": 4400 }, { "epoch": 1.62, "learning_rate": 8.512987012987015e-06, "loss": 0.0634, "step": 4410 }, { "epoch": 1.62, "learning_rate": 8.50937950937951e-06, "loss": 0.0683, "step": 4420 }, { "epoch": 1.63, "learning_rate": 8.505772005772007e-06, "loss": 0.0673, "step": 4430 }, { "epoch": 1.63, "learning_rate": 8.502164502164502e-06, "loss": 0.0677, "step": 4440 }, { "epoch": 1.64, "learning_rate": 8.498556998556999e-06, "loss": 0.0676, "step": 4450 }, { "epoch": 1.64, "learning_rate": 8.494949494949496e-06, "loss": 0.0686, "step": 4460 }, { "epoch": 1.64, "learning_rate": 8.491341991341993e-06, "loss": 0.0678, "step": 4470 }, { "epoch": 1.65, "learning_rate": 8.487734487734488e-06, "loss": 0.0663, "step": 4480 }, { "epoch": 1.65, "learning_rate": 8.484126984126985e-06, "loss": 0.0659, "step": 4490 }, { "epoch": 1.65, "learning_rate": 8.480519480519482e-06, "loss": 0.0639, "step": 4500 }, { "epoch": 1.65, "eval_loss": 0.08935546875, "eval_runtime": 1011.4705, "eval_samples_per_second": 2.739, "eval_steps_per_second": 0.022, "eval_wer": 22.55596008629989, "step": 4500 }, { "epoch": 1.66, "learning_rate": 8.476911976911979e-06, "loss": 0.0664, "step": 4510 }, { "epoch": 1.66, "learning_rate": 8.473304473304474e-06, "loss": 0.0662, "step": 4520 }, { "epoch": 1.67, "learning_rate": 8.46969696969697e-06, "loss": 0.067, "step": 4530 }, { "epoch": 1.67, "learning_rate": 8.466089466089466e-06, "loss": 0.0661, "step": 4540 }, { "epoch": 1.67, "learning_rate": 8.462481962481963e-06, "loss": 0.0655, "step": 4550 }, { "epoch": 1.68, "learning_rate": 8.45887445887446e-06, "loss": 0.0655, "step": 4560 }, { "epoch": 1.68, "learning_rate": 8.455266955266957e-06, "loss": 0.0641, "step": 4570 }, { "epoch": 1.68, "learning_rate": 8.451659451659452e-06, "loss": 0.0658, "step": 4580 }, { "epoch": 1.69, "learning_rate": 8.448051948051949e-06, "loss": 0.0646, "step": 4590 }, { "epoch": 1.69, "learning_rate": 8.444444444444446e-06, "loss": 0.0675, "step": 4600 }, { "epoch": 1.69, "eval_loss": 0.088623046875, "eval_runtime": 296.69, "eval_samples_per_second": 9.336, "eval_steps_per_second": 0.074, "eval_wer": 22.431229773462782, "step": 4600 }, { "epoch": 1.69, "learning_rate": 8.440836940836943e-06, "loss": 0.0646, "step": 4610 }, { "epoch": 1.7, "learning_rate": 8.437229437229438e-06, "loss": 0.0634, "step": 4620 }, { "epoch": 1.7, "learning_rate": 8.433621933621935e-06, "loss": 0.0665, "step": 4630 }, { "epoch": 1.71, "learning_rate": 8.43001443001443e-06, "loss": 0.0659, "step": 4640 }, { "epoch": 1.71, "learning_rate": 8.426406926406927e-06, "loss": 0.0661, "step": 4650 }, { "epoch": 1.71, "learning_rate": 8.422799422799424e-06, "loss": 0.0665, "step": 4660 }, { "epoch": 1.72, "learning_rate": 8.41919191919192e-06, "loss": 0.0633, "step": 4670 }, { "epoch": 1.72, "learning_rate": 8.415584415584416e-06, "loss": 0.0644, "step": 4680 }, { "epoch": 1.72, "learning_rate": 8.411976911976913e-06, "loss": 0.0632, "step": 4690 }, { "epoch": 1.73, "learning_rate": 8.40836940836941e-06, "loss": 0.0621, "step": 4700 }, { "epoch": 1.73, "eval_loss": 0.08856201171875, "eval_runtime": 349.513, "eval_samples_per_second": 7.925, "eval_steps_per_second": 0.063, "eval_wer": 22.518878101402372, "step": 4700 }, { "epoch": 1.73, "learning_rate": 8.404761904761905e-06, "loss": 0.0652, "step": 4710 }, { "epoch": 1.74, "learning_rate": 8.401154401154402e-06, "loss": 0.0629, "step": 4720 }, { "epoch": 1.74, "learning_rate": 8.397546897546899e-06, "loss": 0.0657, "step": 4730 }, { "epoch": 1.74, "learning_rate": 8.393939393939394e-06, "loss": 0.0645, "step": 4740 }, { "epoch": 1.75, "learning_rate": 8.39033189033189e-06, "loss": 0.0667, "step": 4750 }, { "epoch": 1.75, "learning_rate": 8.386724386724387e-06, "loss": 0.0634, "step": 4760 }, { "epoch": 1.75, "learning_rate": 8.383116883116884e-06, "loss": 0.0666, "step": 4770 }, { "epoch": 1.76, "learning_rate": 8.379509379509381e-06, "loss": 0.0663, "step": 4780 }, { "epoch": 1.76, "learning_rate": 8.375901875901876e-06, "loss": 0.0686, "step": 4790 }, { "epoch": 1.76, "learning_rate": 8.372294372294373e-06, "loss": 0.0658, "step": 4800 }, { "epoch": 1.76, "eval_loss": 0.08856201171875, "eval_runtime": 691.7141, "eval_samples_per_second": 4.005, "eval_steps_per_second": 0.032, "eval_wer": 22.249190938511326, "step": 4800 }, { "epoch": 1.77, "learning_rate": 8.368686868686869e-06, "loss": 0.0649, "step": 4810 }, { "epoch": 1.77, "learning_rate": 8.365079365079365e-06, "loss": 0.064, "step": 4820 }, { "epoch": 1.78, "learning_rate": 8.361471861471862e-06, "loss": 0.0639, "step": 4830 }, { "epoch": 1.78, "learning_rate": 8.357864357864358e-06, "loss": 0.0658, "step": 4840 }, { "epoch": 1.78, "learning_rate": 8.354256854256854e-06, "loss": 0.0712, "step": 4850 }, { "epoch": 1.79, "learning_rate": 8.350649350649351e-06, "loss": 0.0664, "step": 4860 }, { "epoch": 1.79, "learning_rate": 8.347041847041848e-06, "loss": 0.0611, "step": 4870 }, { "epoch": 1.79, "learning_rate": 8.343434343434345e-06, "loss": 0.0675, "step": 4880 }, { "epoch": 1.8, "learning_rate": 8.33982683982684e-06, "loss": 0.0659, "step": 4890 }, { "epoch": 1.8, "learning_rate": 8.336219336219337e-06, "loss": 0.0654, "step": 4900 }, { "epoch": 1.8, "eval_loss": 0.08856201171875, "eval_runtime": 583.1687, "eval_samples_per_second": 4.75, "eval_steps_per_second": 0.038, "eval_wer": 22.090749730312837, "step": 4900 }, { "epoch": 1.81, "learning_rate": 8.332611832611832e-06, "loss": 0.0679, "step": 4910 }, { "epoch": 1.81, "learning_rate": 8.32900432900433e-06, "loss": 0.0667, "step": 4920 }, { "epoch": 1.81, "learning_rate": 8.325396825396826e-06, "loss": 0.0661, "step": 4930 }, { "epoch": 1.82, "learning_rate": 8.321789321789321e-06, "loss": 0.0635, "step": 4940 }, { "epoch": 1.82, "learning_rate": 8.318181818181818e-06, "loss": 0.0645, "step": 4950 }, { "epoch": 1.82, "learning_rate": 8.314574314574315e-06, "loss": 0.0663, "step": 4960 }, { "epoch": 1.83, "learning_rate": 8.310966810966812e-06, "loss": 0.0646, "step": 4970 }, { "epoch": 1.83, "learning_rate": 8.307359307359309e-06, "loss": 0.0631, "step": 4980 }, { "epoch": 1.83, "learning_rate": 8.303751803751804e-06, "loss": 0.0659, "step": 4990 }, { "epoch": 1.84, "learning_rate": 8.300144300144301e-06, "loss": 0.0639, "step": 5000 }, { "epoch": 1.84, "eval_loss": 0.0875244140625, "eval_runtime": 638.4901, "eval_samples_per_second": 4.338, "eval_steps_per_second": 0.034, "eval_wer": 22.249190938511326, "step": 5000 }, { "epoch": 1.84, "learning_rate": 8.297258297258298e-06, "loss": 0.0649, "step": 5010 }, { "epoch": 1.85, "learning_rate": 8.293650793650794e-06, "loss": 0.0653, "step": 5020 }, { "epoch": 1.85, "learning_rate": 8.29004329004329e-06, "loss": 0.0672, "step": 5030 }, { "epoch": 1.85, "learning_rate": 8.286435786435787e-06, "loss": 0.0646, "step": 5040 }, { "epoch": 1.86, "learning_rate": 8.282828282828283e-06, "loss": 0.0639, "step": 5050 }, { "epoch": 1.86, "learning_rate": 8.27922077922078e-06, "loss": 0.0671, "step": 5060 }, { "epoch": 1.86, "learning_rate": 8.275613275613277e-06, "loss": 0.0695, "step": 5070 }, { "epoch": 1.87, "learning_rate": 8.272005772005772e-06, "loss": 0.0654, "step": 5080 }, { "epoch": 1.87, "learning_rate": 8.26839826839827e-06, "loss": 0.0654, "step": 5090 }, { "epoch": 1.88, "learning_rate": 8.264790764790766e-06, "loss": 0.0631, "step": 5100 }, { "epoch": 1.88, "eval_loss": 0.08734130859375, "eval_runtime": 259.0875, "eval_samples_per_second": 10.691, "eval_steps_per_second": 0.085, "eval_wer": 22.239077669902912, "step": 5100 }, { "epoch": 1.88, "learning_rate": 8.261183261183261e-06, "loss": 0.0656, "step": 5110 }, { "epoch": 1.88, "learning_rate": 8.257575757575758e-06, "loss": 0.0644, "step": 5120 }, { "epoch": 1.89, "learning_rate": 8.253968253968254e-06, "loss": 0.0634, "step": 5130 }, { "epoch": 1.89, "learning_rate": 8.25036075036075e-06, "loss": 0.0659, "step": 5140 }, { "epoch": 1.89, "learning_rate": 8.246753246753247e-06, "loss": 0.0656, "step": 5150 }, { "epoch": 1.9, "learning_rate": 8.243145743145744e-06, "loss": 0.0633, "step": 5160 }, { "epoch": 1.9, "learning_rate": 8.239538239538241e-06, "loss": 0.0665, "step": 5170 }, { "epoch": 1.9, "learning_rate": 8.235930735930736e-06, "loss": 0.0652, "step": 5180 }, { "epoch": 1.91, "learning_rate": 8.232323232323233e-06, "loss": 0.0626, "step": 5190 }, { "epoch": 1.91, "learning_rate": 8.22871572871573e-06, "loss": 0.0645, "step": 5200 }, { "epoch": 1.91, "eval_loss": 0.0869140625, "eval_runtime": 493.2946, "eval_samples_per_second": 5.615, "eval_steps_per_second": 0.045, "eval_wer": 22.31324163969795, "step": 5200 }, { "epoch": 1.92, "learning_rate": 8.225108225108225e-06, "loss": 0.0644, "step": 5210 }, { "epoch": 1.92, "learning_rate": 8.221500721500722e-06, "loss": 0.0649, "step": 5220 }, { "epoch": 1.92, "learning_rate": 8.217893217893217e-06, "loss": 0.0658, "step": 5230 }, { "epoch": 1.93, "learning_rate": 8.214285714285714e-06, "loss": 0.066, "step": 5240 }, { "epoch": 1.93, "learning_rate": 8.210678210678211e-06, "loss": 0.0634, "step": 5250 }, { "epoch": 1.93, "learning_rate": 8.207070707070708e-06, "loss": 0.065, "step": 5260 }, { "epoch": 1.94, "learning_rate": 8.203463203463205e-06, "loss": 0.0633, "step": 5270 }, { "epoch": 1.94, "learning_rate": 8.199855699855702e-06, "loss": 0.0628, "step": 5280 }, { "epoch": 1.94, "learning_rate": 8.196248196248197e-06, "loss": 0.065, "step": 5290 }, { "epoch": 1.95, "learning_rate": 8.192640692640694e-06, "loss": 0.0665, "step": 5300 }, { "epoch": 1.95, "eval_loss": 0.0872802734375, "eval_runtime": 673.3399, "eval_samples_per_second": 4.114, "eval_steps_per_second": 0.033, "eval_wer": 22.198624595469255, "step": 5300 }, { "epoch": 1.95, "learning_rate": 8.18903318903319e-06, "loss": 0.0622, "step": 5310 }, { "epoch": 1.96, "learning_rate": 8.185425685425686e-06, "loss": 0.0666, "step": 5320 }, { "epoch": 1.96, "learning_rate": 8.181818181818183e-06, "loss": 0.0666, "step": 5330 }, { "epoch": 1.96, "learning_rate": 8.178210678210678e-06, "loss": 0.0631, "step": 5340 }, { "epoch": 1.97, "learning_rate": 8.174603174603175e-06, "loss": 0.0632, "step": 5350 }, { "epoch": 1.97, "learning_rate": 8.170995670995672e-06, "loss": 0.0623, "step": 5360 }, { "epoch": 1.97, "learning_rate": 8.167388167388169e-06, "loss": 0.0657, "step": 5370 }, { "epoch": 1.98, "learning_rate": 8.163780663780666e-06, "loss": 0.0651, "step": 5380 }, { "epoch": 1.98, "learning_rate": 8.160173160173161e-06, "loss": 0.0655, "step": 5390 }, { "epoch": 1.99, "learning_rate": 8.156565656565658e-06, "loss": 0.0641, "step": 5400 }, { "epoch": 1.99, "eval_loss": 0.0867919921875, "eval_runtime": 648.7299, "eval_samples_per_second": 4.27, "eval_steps_per_second": 0.034, "eval_wer": 22.228964401294498, "step": 5400 }, { "epoch": 1.99, "learning_rate": 8.152958152958153e-06, "loss": 0.0657, "step": 5410 }, { "epoch": 1.99, "learning_rate": 8.14935064935065e-06, "loss": 0.065, "step": 5420 }, { "epoch": 2.0, "learning_rate": 8.145743145743147e-06, "loss": 0.0652, "step": 5430 }, { "epoch": 2.0, "learning_rate": 8.142135642135642e-06, "loss": 0.064, "step": 5440 }, { "epoch": 2.0, "learning_rate": 8.138528138528139e-06, "loss": 0.0557, "step": 5450 }, { "epoch": 2.01, "learning_rate": 8.134920634920636e-06, "loss": 0.0552, "step": 5460 }, { "epoch": 2.01, "learning_rate": 8.131313131313133e-06, "loss": 0.0555, "step": 5470 }, { "epoch": 2.01, "learning_rate": 8.12770562770563e-06, "loss": 0.0545, "step": 5480 }, { "epoch": 2.02, "learning_rate": 8.124098124098125e-06, "loss": 0.0565, "step": 5490 }, { "epoch": 2.02, "learning_rate": 8.120490620490622e-06, "loss": 0.0558, "step": 5500 }, { "epoch": 2.02, "eval_loss": 0.0870361328125, "eval_runtime": 693.0613, "eval_samples_per_second": 3.997, "eval_steps_per_second": 0.032, "eval_wer": 22.063781014023732, "step": 5500 }, { "epoch": 2.03, "learning_rate": 8.116883116883117e-06, "loss": 0.0541, "step": 5510 }, { "epoch": 2.03, "learning_rate": 8.113275613275614e-06, "loss": 0.0544, "step": 5520 }, { "epoch": 2.03, "learning_rate": 8.10966810966811e-06, "loss": 0.0532, "step": 5530 }, { "epoch": 2.04, "learning_rate": 8.106060606060606e-06, "loss": 0.054, "step": 5540 }, { "epoch": 2.04, "learning_rate": 8.102453102453103e-06, "loss": 0.0555, "step": 5550 }, { "epoch": 2.04, "learning_rate": 8.0988455988456e-06, "loss": 0.0543, "step": 5560 }, { "epoch": 2.05, "learning_rate": 8.095238095238097e-06, "loss": 0.0558, "step": 5570 }, { "epoch": 2.05, "learning_rate": 8.091630591630593e-06, "loss": 0.0523, "step": 5580 }, { "epoch": 2.06, "learning_rate": 8.088023088023089e-06, "loss": 0.0546, "step": 5590 }, { "epoch": 2.06, "learning_rate": 8.084415584415586e-06, "loss": 0.0556, "step": 5600 }, { "epoch": 2.06, "eval_loss": 0.08795166015625, "eval_runtime": 712.0699, "eval_samples_per_second": 3.89, "eval_steps_per_second": 0.031, "eval_wer": 21.96264832793959, "step": 5600 }, { "epoch": 2.06, "learning_rate": 8.08080808080808e-06, "loss": 0.0568, "step": 5610 }, { "epoch": 2.07, "learning_rate": 8.077200577200578e-06, "loss": 0.053, "step": 5620 }, { "epoch": 2.07, "learning_rate": 8.073593073593075e-06, "loss": 0.0541, "step": 5630 }, { "epoch": 2.07, "learning_rate": 8.069985569985571e-06, "loss": 0.0538, "step": 5640 }, { "epoch": 2.08, "learning_rate": 8.066378066378067e-06, "loss": 0.053, "step": 5650 }, { "epoch": 2.08, "learning_rate": 8.062770562770564e-06, "loss": 0.055, "step": 5660 }, { "epoch": 2.08, "learning_rate": 8.05916305916306e-06, "loss": 0.0539, "step": 5670 }, { "epoch": 2.09, "learning_rate": 8.055555555555557e-06, "loss": 0.056, "step": 5680 }, { "epoch": 2.09, "learning_rate": 8.051948051948052e-06, "loss": 0.0557, "step": 5690 }, { "epoch": 2.1, "learning_rate": 8.04834054834055e-06, "loss": 0.0562, "step": 5700 }, { "epoch": 2.1, "eval_loss": 0.08782958984375, "eval_runtime": 270.8387, "eval_samples_per_second": 10.227, "eval_steps_per_second": 0.081, "eval_wer": 22.10423408845739, "step": 5700 }, { "epoch": 2.1, "learning_rate": 8.044733044733045e-06, "loss": 0.0554, "step": 5710 }, { "epoch": 2.1, "learning_rate": 8.041125541125541e-06, "loss": 0.0524, "step": 5720 }, { "epoch": 2.11, "learning_rate": 8.037518037518038e-06, "loss": 0.0554, "step": 5730 }, { "epoch": 2.11, "learning_rate": 8.033910533910535e-06, "loss": 0.0561, "step": 5740 }, { "epoch": 2.11, "learning_rate": 8.03030303030303e-06, "loss": 0.0535, "step": 5750 }, { "epoch": 2.12, "learning_rate": 8.026695526695527e-06, "loss": 0.0548, "step": 5760 }, { "epoch": 2.12, "learning_rate": 8.023088023088024e-06, "loss": 0.0528, "step": 5770 }, { "epoch": 2.12, "learning_rate": 8.019480519480521e-06, "loss": 0.057, "step": 5780 }, { "epoch": 2.13, "learning_rate": 8.015873015873016e-06, "loss": 0.0579, "step": 5790 }, { "epoch": 2.13, "learning_rate": 8.012265512265513e-06, "loss": 0.0547, "step": 5800 }, { "epoch": 2.13, "eval_loss": 0.0888671875, "eval_runtime": 251.3262, "eval_samples_per_second": 11.022, "eval_steps_per_second": 0.088, "eval_wer": 22.05029665587918, "step": 5800 }, { "epoch": 2.14, "learning_rate": 8.008658008658008e-06, "loss": 0.0538, "step": 5810 }, { "epoch": 2.14, "learning_rate": 8.005050505050505e-06, "loss": 0.0565, "step": 5820 }, { "epoch": 2.14, "learning_rate": 8.001443001443002e-06, "loss": 0.0551, "step": 5830 }, { "epoch": 2.15, "learning_rate": 7.997835497835499e-06, "loss": 0.0537, "step": 5840 }, { "epoch": 2.15, "learning_rate": 7.994227994227994e-06, "loss": 0.0545, "step": 5850 }, { "epoch": 2.15, "learning_rate": 7.990620490620491e-06, "loss": 0.0547, "step": 5860 }, { "epoch": 2.16, "learning_rate": 7.987012987012988e-06, "loss": 0.0549, "step": 5870 }, { "epoch": 2.16, "learning_rate": 7.983405483405483e-06, "loss": 0.0533, "step": 5880 }, { "epoch": 2.17, "learning_rate": 7.97979797979798e-06, "loss": 0.0535, "step": 5890 }, { "epoch": 2.17, "learning_rate": 7.976190476190477e-06, "loss": 0.0553, "step": 5900 }, { "epoch": 2.17, "eval_loss": 0.08807373046875, "eval_runtime": 528.7421, "eval_samples_per_second": 5.239, "eval_steps_per_second": 0.042, "eval_wer": 22.218851132686083, "step": 5900 }, { "epoch": 2.17, "learning_rate": 7.972582972582972e-06, "loss": 0.0531, "step": 5910 }, { "epoch": 2.18, "learning_rate": 7.96897546897547e-06, "loss": 0.0553, "step": 5920 }, { "epoch": 2.18, "learning_rate": 7.965367965367966e-06, "loss": 0.0548, "step": 5930 }, { "epoch": 2.18, "learning_rate": 7.961760461760463e-06, "loss": 0.0533, "step": 5940 }, { "epoch": 2.19, "learning_rate": 7.95815295815296e-06, "loss": 0.0531, "step": 5950 }, { "epoch": 2.19, "learning_rate": 7.954545454545455e-06, "loss": 0.0536, "step": 5960 }, { "epoch": 2.19, "learning_rate": 7.950937950937952e-06, "loss": 0.0564, "step": 5970 }, { "epoch": 2.2, "learning_rate": 7.947330447330447e-06, "loss": 0.0542, "step": 5980 }, { "epoch": 2.2, "learning_rate": 7.943722943722944e-06, "loss": 0.0551, "step": 5990 }, { "epoch": 2.21, "learning_rate": 7.940115440115441e-06, "loss": 0.0547, "step": 6000 }, { "epoch": 2.21, "eval_loss": 0.08795166015625, "eval_runtime": 556.2644, "eval_samples_per_second": 4.98, "eval_steps_per_second": 0.04, "eval_wer": 21.98287486515642, "step": 6000 }, { "epoch": 2.21, "learning_rate": 7.936868686868688e-06, "loss": 0.0552, "step": 6010 }, { "epoch": 2.21, "learning_rate": 7.933621933621934e-06, "loss": 0.0556, "step": 6020 }, { "epoch": 2.22, "learning_rate": 7.930014430014431e-06, "loss": 0.0536, "step": 6030 }, { "epoch": 2.22, "learning_rate": 7.926406926406926e-06, "loss": 0.056, "step": 6040 }, { "epoch": 2.22, "learning_rate": 7.922799422799423e-06, "loss": 0.0556, "step": 6050 }, { "epoch": 2.23, "learning_rate": 7.91919191919192e-06, "loss": 0.0551, "step": 6060 }, { "epoch": 2.23, "learning_rate": 7.915584415584417e-06, "loss": 0.0552, "step": 6070 }, { "epoch": 2.24, "learning_rate": 7.911976911976912e-06, "loss": 0.0558, "step": 6080 }, { "epoch": 2.24, "learning_rate": 7.90836940836941e-06, "loss": 0.0531, "step": 6090 }, { "epoch": 2.24, "learning_rate": 7.904761904761904e-06, "loss": 0.0544, "step": 6100 }, { "epoch": 2.24, "eval_loss": 0.08709716796875, "eval_runtime": 263.4486, "eval_samples_per_second": 10.514, "eval_steps_per_second": 0.084, "eval_wer": 22.110976267529665, "step": 6100 }, { "epoch": 2.25, "learning_rate": 7.901154401154401e-06, "loss": 0.0523, "step": 6110 }, { "epoch": 2.25, "learning_rate": 7.897546897546898e-06, "loss": 0.0552, "step": 6120 }, { "epoch": 2.25, "learning_rate": 7.893939393939395e-06, "loss": 0.0545, "step": 6130 }, { "epoch": 2.26, "learning_rate": 7.890331890331892e-06, "loss": 0.0554, "step": 6140 }, { "epoch": 2.26, "learning_rate": 7.886724386724387e-06, "loss": 0.0555, "step": 6150 }, { "epoch": 2.26, "learning_rate": 7.883116883116884e-06, "loss": 0.0544, "step": 6160 }, { "epoch": 2.27, "learning_rate": 7.879509379509381e-06, "loss": 0.0523, "step": 6170 }, { "epoch": 2.27, "learning_rate": 7.875901875901876e-06, "loss": 0.0547, "step": 6180 }, { "epoch": 2.28, "learning_rate": 7.872294372294373e-06, "loss": 0.0536, "step": 6190 }, { "epoch": 2.28, "learning_rate": 7.868686868686868e-06, "loss": 0.0573, "step": 6200 }, { "epoch": 2.28, "eval_loss": 0.08660888671875, "eval_runtime": 367.0765, "eval_samples_per_second": 7.546, "eval_steps_per_second": 0.06, "eval_wer": 21.635652642934197, "step": 6200 }, { "epoch": 2.28, "learning_rate": 7.865079365079365e-06, "loss": 0.0541, "step": 6210 }, { "epoch": 2.29, "learning_rate": 7.861471861471862e-06, "loss": 0.053, "step": 6220 }, { "epoch": 2.29, "learning_rate": 7.857864357864359e-06, "loss": 0.0558, "step": 6230 }, { "epoch": 2.29, "learning_rate": 7.854256854256856e-06, "loss": 0.0543, "step": 6240 }, { "epoch": 2.3, "learning_rate": 7.850649350649351e-06, "loss": 0.0527, "step": 6250 }, { "epoch": 2.3, "learning_rate": 7.847041847041848e-06, "loss": 0.0535, "step": 6260 }, { "epoch": 2.31, "learning_rate": 7.843434343434345e-06, "loss": 0.0537, "step": 6270 }, { "epoch": 2.31, "learning_rate": 7.83982683982684e-06, "loss": 0.0563, "step": 6280 }, { "epoch": 2.31, "learning_rate": 7.836219336219337e-06, "loss": 0.0555, "step": 6290 }, { "epoch": 2.32, "learning_rate": 7.832611832611832e-06, "loss": 0.0562, "step": 6300 }, { "epoch": 2.32, "eval_loss": 0.08721923828125, "eval_runtime": 637.4835, "eval_samples_per_second": 4.345, "eval_steps_per_second": 0.035, "eval_wer": 21.97950377562028, "step": 6300 }, { "epoch": 2.32, "learning_rate": 7.829004329004329e-06, "loss": 0.0571, "step": 6310 }, { "epoch": 2.32, "learning_rate": 7.825396825396826e-06, "loss": 0.0566, "step": 6320 }, { "epoch": 2.33, "learning_rate": 7.821789321789323e-06, "loss": 0.0547, "step": 6330 }, { "epoch": 2.33, "learning_rate": 7.81818181818182e-06, "loss": 0.0557, "step": 6340 }, { "epoch": 2.33, "learning_rate": 7.814574314574315e-06, "loss": 0.0564, "step": 6350 }, { "epoch": 2.34, "learning_rate": 7.810966810966812e-06, "loss": 0.0552, "step": 6360 }, { "epoch": 2.34, "learning_rate": 7.807359307359309e-06, "loss": 0.0539, "step": 6370 }, { "epoch": 2.35, "learning_rate": 7.803751803751804e-06, "loss": 0.0536, "step": 6380 }, { "epoch": 2.35, "learning_rate": 7.8001443001443e-06, "loss": 0.0543, "step": 6390 }, { "epoch": 2.35, "learning_rate": 7.796536796536796e-06, "loss": 0.0551, "step": 6400 }, { "epoch": 2.35, "eval_loss": 0.0872802734375, "eval_runtime": 686.505, "eval_samples_per_second": 4.035, "eval_steps_per_second": 0.032, "eval_wer": 21.976132686084142, "step": 6400 }, { "epoch": 2.36, "learning_rate": 7.792929292929293e-06, "loss": 0.0549, "step": 6410 }, { "epoch": 2.36, "learning_rate": 7.78932178932179e-06, "loss": 0.055, "step": 6420 }, { "epoch": 2.36, "learning_rate": 7.785714285714287e-06, "loss": 0.0545, "step": 6430 }, { "epoch": 2.37, "learning_rate": 7.782106782106784e-06, "loss": 0.0532, "step": 6440 }, { "epoch": 2.37, "learning_rate": 7.77849927849928e-06, "loss": 0.0558, "step": 6450 }, { "epoch": 2.38, "learning_rate": 7.774891774891776e-06, "loss": 0.0538, "step": 6460 }, { "epoch": 2.38, "learning_rate": 7.771284271284273e-06, "loss": 0.0534, "step": 6470 }, { "epoch": 2.38, "learning_rate": 7.767676767676768e-06, "loss": 0.0516, "step": 6480 }, { "epoch": 2.39, "learning_rate": 7.764069264069265e-06, "loss": 0.0537, "step": 6490 }, { "epoch": 2.39, "learning_rate": 7.760461760461762e-06, "loss": 0.0556, "step": 6500 }, { "epoch": 2.39, "eval_loss": 0.08697509765625, "eval_runtime": 549.3253, "eval_samples_per_second": 5.043, "eval_steps_per_second": 0.04, "eval_wer": 22.01321467098166, "step": 6500 }, { "epoch": 2.39, "learning_rate": 7.756854256854257e-06, "loss": 0.0535, "step": 6510 }, { "epoch": 2.4, "learning_rate": 7.753246753246754e-06, "loss": 0.0539, "step": 6520 }, { "epoch": 2.4, "learning_rate": 7.74963924963925e-06, "loss": 0.0531, "step": 6530 }, { "epoch": 2.4, "learning_rate": 7.746031746031747e-06, "loss": 0.0546, "step": 6540 }, { "epoch": 2.41, "learning_rate": 7.742424242424244e-06, "loss": 0.0552, "step": 6550 }, { "epoch": 2.41, "learning_rate": 7.73881673881674e-06, "loss": 0.0565, "step": 6560 }, { "epoch": 2.42, "learning_rate": 7.735209235209236e-06, "loss": 0.0551, "step": 6570 }, { "epoch": 2.42, "learning_rate": 7.731601731601732e-06, "loss": 0.0551, "step": 6580 }, { "epoch": 2.42, "learning_rate": 7.727994227994229e-06, "loss": 0.0534, "step": 6590 }, { "epoch": 2.43, "learning_rate": 7.724386724386725e-06, "loss": 0.0558, "step": 6600 }, { "epoch": 2.43, "eval_loss": 0.0870361328125, "eval_runtime": 256.8196, "eval_samples_per_second": 10.786, "eval_steps_per_second": 0.086, "eval_wer": 21.672734627831716, "step": 6600 }, { "epoch": 2.43, "learning_rate": 7.72077922077922e-06, "loss": 0.0564, "step": 6610 }, { "epoch": 2.43, "learning_rate": 7.717171717171717e-06, "loss": 0.0572, "step": 6620 }, { "epoch": 2.44, "learning_rate": 7.713564213564214e-06, "loss": 0.0569, "step": 6630 }, { "epoch": 2.44, "learning_rate": 7.709956709956711e-06, "loss": 0.0538, "step": 6640 }, { "epoch": 2.44, "learning_rate": 7.706349206349208e-06, "loss": 0.0541, "step": 6650 }, { "epoch": 2.45, "learning_rate": 7.702741702741703e-06, "loss": 0.0542, "step": 6660 }, { "epoch": 2.45, "learning_rate": 7.6991341991342e-06, "loss": 0.0532, "step": 6670 }, { "epoch": 2.46, "learning_rate": 7.695526695526695e-06, "loss": 0.0552, "step": 6680 }, { "epoch": 2.46, "learning_rate": 7.691919191919192e-06, "loss": 0.0553, "step": 6690 }, { "epoch": 2.46, "learning_rate": 7.68831168831169e-06, "loss": 0.0565, "step": 6700 }, { "epoch": 2.46, "eval_loss": 0.08868408203125, "eval_runtime": 462.3577, "eval_samples_per_second": 5.991, "eval_steps_per_second": 0.048, "eval_wer": 22.646979503775622, "step": 6700 }, { "epoch": 2.47, "learning_rate": 7.684704184704186e-06, "loss": 0.0555, "step": 6710 }, { "epoch": 2.47, "learning_rate": 7.681096681096681e-06, "loss": 0.0566, "step": 6720 }, { "epoch": 2.47, "learning_rate": 7.677489177489178e-06, "loss": 0.0541, "step": 6730 }, { "epoch": 2.48, "learning_rate": 7.673881673881675e-06, "loss": 0.055, "step": 6740 }, { "epoch": 2.48, "learning_rate": 7.670274170274172e-06, "loss": 0.055, "step": 6750 }, { "epoch": 2.49, "learning_rate": 7.666666666666667e-06, "loss": 0.0544, "step": 6760 }, { "epoch": 2.49, "learning_rate": 7.663059163059164e-06, "loss": 0.0553, "step": 6770 }, { "epoch": 2.49, "learning_rate": 7.65945165945166e-06, "loss": 0.0538, "step": 6780 }, { "epoch": 2.5, "learning_rate": 7.655844155844156e-06, "loss": 0.0567, "step": 6790 }, { "epoch": 2.5, "learning_rate": 7.652236652236653e-06, "loss": 0.0549, "step": 6800 }, { "epoch": 2.5, "eval_loss": 0.08734130859375, "eval_runtime": 650.8788, "eval_samples_per_second": 4.256, "eval_steps_per_second": 0.034, "eval_wer": 21.794093851132686, "step": 6800 }, { "epoch": 2.5, "learning_rate": 7.64862914862915e-06, "loss": 0.0559, "step": 6810 }, { "epoch": 2.51, "learning_rate": 7.645021645021645e-06, "loss": 0.0557, "step": 6820 }, { "epoch": 2.51, "learning_rate": 7.641414141414142e-06, "loss": 0.0532, "step": 6830 }, { "epoch": 2.51, "learning_rate": 7.637806637806639e-06, "loss": 0.0537, "step": 6840 }, { "epoch": 2.52, "learning_rate": 7.634199134199136e-06, "loss": 0.0572, "step": 6850 }, { "epoch": 2.52, "learning_rate": 7.630591630591631e-06, "loss": 0.0559, "step": 6860 }, { "epoch": 2.53, "learning_rate": 7.626984126984127e-06, "loss": 0.0558, "step": 6870 }, { "epoch": 2.53, "learning_rate": 7.623376623376624e-06, "loss": 0.0549, "step": 6880 }, { "epoch": 2.53, "learning_rate": 7.61976911976912e-06, "loss": 0.0533, "step": 6890 }, { "epoch": 2.54, "learning_rate": 7.616161616161617e-06, "loss": 0.0559, "step": 6900 }, { "epoch": 2.54, "eval_loss": 0.08636474609375, "eval_runtime": 684.4605, "eval_samples_per_second": 4.047, "eval_steps_per_second": 0.032, "eval_wer": 22.185140237324703, "step": 6900 }, { "epoch": 2.54, "learning_rate": 7.612554112554114e-06, "loss": 0.0555, "step": 6910 }, { "epoch": 2.54, "learning_rate": 7.608946608946609e-06, "loss": 0.0552, "step": 6920 }, { "epoch": 2.55, "learning_rate": 7.605339105339106e-06, "loss": 0.0545, "step": 6930 }, { "epoch": 2.55, "learning_rate": 7.601731601731602e-06, "loss": 0.0522, "step": 6940 }, { "epoch": 2.56, "learning_rate": 7.598124098124099e-06, "loss": 0.0545, "step": 6950 }, { "epoch": 2.56, "learning_rate": 7.594516594516596e-06, "loss": 0.0562, "step": 6960 }, { "epoch": 2.56, "learning_rate": 7.590909090909091e-06, "loss": 0.0544, "step": 6970 }, { "epoch": 2.57, "learning_rate": 7.587301587301588e-06, "loss": 0.0545, "step": 6980 }, { "epoch": 2.57, "learning_rate": 7.583694083694084e-06, "loss": 0.0544, "step": 6990 }, { "epoch": 2.57, "learning_rate": 7.580086580086581e-06, "loss": 0.0552, "step": 7000 }, { "epoch": 2.57, "eval_loss": 0.0867919921875, "eval_runtime": 359.998, "eval_samples_per_second": 7.694, "eval_steps_per_second": 0.061, "eval_wer": 21.85477346278317, "step": 7000 }, { "epoch": 2.58, "learning_rate": 7.576479076479078e-06, "loss": 0.0548, "step": 7010 }, { "epoch": 2.58, "learning_rate": 7.573593073593074e-06, "loss": 0.0534, "step": 7020 }, { "epoch": 2.58, "learning_rate": 7.56998556998557e-06, "loss": 0.055, "step": 7030 }, { "epoch": 2.59, "learning_rate": 7.566378066378067e-06, "loss": 0.0547, "step": 7040 }, { "epoch": 2.59, "learning_rate": 7.562770562770564e-06, "loss": 0.0539, "step": 7050 }, { "epoch": 2.6, "learning_rate": 7.559163059163059e-06, "loss": 0.0554, "step": 7060 }, { "epoch": 2.6, "learning_rate": 7.555555555555556e-06, "loss": 0.0587, "step": 7070 }, { "epoch": 2.6, "learning_rate": 7.551948051948052e-06, "loss": 0.0554, "step": 7080 }, { "epoch": 2.61, "learning_rate": 7.548340548340549e-06, "loss": 0.0551, "step": 7090 }, { "epoch": 2.61, "learning_rate": 7.544733044733046e-06, "loss": 0.0546, "step": 7100 }, { "epoch": 2.61, "eval_loss": 0.08642578125, "eval_runtime": 281.6212, "eval_samples_per_second": 9.836, "eval_steps_per_second": 0.078, "eval_wer": 21.794093851132686, "step": 7100 }, { "epoch": 2.61, "learning_rate": 7.541125541125541e-06, "loss": 0.0537, "step": 7110 }, { "epoch": 2.62, "learning_rate": 7.537518037518038e-06, "loss": 0.0562, "step": 7120 }, { "epoch": 2.62, "learning_rate": 7.533910533910534e-06, "loss": 0.0545, "step": 7130 }, { "epoch": 2.62, "learning_rate": 7.530303030303031e-06, "loss": 0.0555, "step": 7140 }, { "epoch": 2.63, "learning_rate": 7.526695526695528e-06, "loss": 0.0557, "step": 7150 }, { "epoch": 2.63, "learning_rate": 7.523088023088023e-06, "loss": 0.0525, "step": 7160 }, { "epoch": 2.64, "learning_rate": 7.51948051948052e-06, "loss": 0.0566, "step": 7170 }, { "epoch": 2.64, "learning_rate": 7.515873015873016e-06, "loss": 0.0545, "step": 7180 }, { "epoch": 2.64, "learning_rate": 7.512265512265513e-06, "loss": 0.0562, "step": 7190 }, { "epoch": 2.65, "learning_rate": 7.50865800865801e-06, "loss": 0.0549, "step": 7200 }, { "epoch": 2.65, "eval_loss": 0.08612060546875, "eval_runtime": 784.1899, "eval_samples_per_second": 3.532, "eval_steps_per_second": 0.028, "eval_wer": 22.357065803667744, "step": 7200 }, { "epoch": 2.65, "learning_rate": 7.505050505050505e-06, "loss": 0.0548, "step": 7210 }, { "epoch": 2.65, "learning_rate": 7.501443001443002e-06, "loss": 0.056, "step": 7220 }, { "epoch": 2.66, "learning_rate": 7.497835497835498e-06, "loss": 0.0544, "step": 7230 }, { "epoch": 2.66, "learning_rate": 7.494227994227995e-06, "loss": 0.054, "step": 7240 }, { "epoch": 2.67, "learning_rate": 7.490620490620492e-06, "loss": 0.0558, "step": 7250 }, { "epoch": 2.67, "learning_rate": 7.487012987012988e-06, "loss": 0.0549, "step": 7260 }, { "epoch": 2.67, "learning_rate": 7.483405483405484e-06, "loss": 0.0555, "step": 7270 }, { "epoch": 2.68, "learning_rate": 7.47979797979798e-06, "loss": 0.0559, "step": 7280 }, { "epoch": 2.68, "learning_rate": 7.476190476190477e-06, "loss": 0.0574, "step": 7290 }, { "epoch": 2.68, "learning_rate": 7.472582972582974e-06, "loss": 0.0567, "step": 7300 }, { "epoch": 2.68, "eval_loss": 0.0860595703125, "eval_runtime": 523.0183, "eval_samples_per_second": 5.296, "eval_steps_per_second": 0.042, "eval_wer": 21.709816612729234, "step": 7300 }, { "epoch": 2.69, "learning_rate": 7.46897546897547e-06, "loss": 0.0541, "step": 7310 }, { "epoch": 2.69, "learning_rate": 7.465367965367966e-06, "loss": 0.0547, "step": 7320 }, { "epoch": 2.69, "learning_rate": 7.461760461760462e-06, "loss": 0.0558, "step": 7330 }, { "epoch": 2.7, "learning_rate": 7.458152958152959e-06, "loss": 0.0536, "step": 7340 }, { "epoch": 2.7, "learning_rate": 7.454545454545456e-06, "loss": 0.0539, "step": 7350 }, { "epoch": 2.71, "learning_rate": 7.450937950937952e-06, "loss": 0.0583, "step": 7360 }, { "epoch": 2.71, "learning_rate": 7.447330447330448e-06, "loss": 0.0541, "step": 7370 }, { "epoch": 2.71, "learning_rate": 7.443722943722944e-06, "loss": 0.0545, "step": 7380 }, { "epoch": 2.72, "learning_rate": 7.440115440115441e-06, "loss": 0.0528, "step": 7390 }, { "epoch": 2.72, "learning_rate": 7.4365079365079376e-06, "loss": 0.0548, "step": 7400 }, { "epoch": 2.72, "eval_loss": 0.0858154296875, "eval_runtime": 282.1778, "eval_samples_per_second": 9.817, "eval_steps_per_second": 0.078, "eval_wer": 21.83791801510248, "step": 7400 }, { "epoch": 2.72, "learning_rate": 7.432900432900434e-06, "loss": 0.0563, "step": 7410 }, { "epoch": 2.73, "learning_rate": 7.42929292929293e-06, "loss": 0.0534, "step": 7420 }, { "epoch": 2.73, "learning_rate": 7.425685425685426e-06, "loss": 0.0571, "step": 7430 }, { "epoch": 2.74, "learning_rate": 7.422077922077923e-06, "loss": 0.0548, "step": 7440 }, { "epoch": 2.74, "learning_rate": 7.4184704184704195e-06, "loss": 0.0545, "step": 7450 }, { "epoch": 2.74, "learning_rate": 7.4148629148629155e-06, "loss": 0.0542, "step": 7460 }, { "epoch": 2.75, "learning_rate": 7.411255411255412e-06, "loss": 0.0547, "step": 7470 }, { "epoch": 2.75, "learning_rate": 7.407647907647908e-06, "loss": 0.0567, "step": 7480 }, { "epoch": 2.75, "learning_rate": 7.4040404040404045e-06, "loss": 0.0563, "step": 7490 }, { "epoch": 2.76, "learning_rate": 7.400432900432901e-06, "loss": 0.055, "step": 7500 }, { "epoch": 2.76, "eval_loss": 0.08563232421875, "eval_runtime": 382.1657, "eval_samples_per_second": 7.248, "eval_steps_per_second": 0.058, "eval_wer": 21.986245954692556, "step": 7500 }, { "epoch": 2.76, "learning_rate": 7.3968253968253975e-06, "loss": 0.0533, "step": 7510 }, { "epoch": 2.76, "learning_rate": 7.3932178932178935e-06, "loss": 0.0546, "step": 7520 }, { "epoch": 2.77, "learning_rate": 7.3896103896103896e-06, "loss": 0.0551, "step": 7530 }, { "epoch": 2.77, "learning_rate": 7.3860028860028865e-06, "loss": 0.0537, "step": 7540 }, { "epoch": 2.78, "learning_rate": 7.382395382395383e-06, "loss": 0.0547, "step": 7550 }, { "epoch": 2.78, "learning_rate": 7.378787878787879e-06, "loss": 0.0556, "step": 7560 }, { "epoch": 2.78, "learning_rate": 7.375180375180376e-06, "loss": 0.0547, "step": 7570 }, { "epoch": 2.79, "learning_rate": 7.3715728715728715e-06, "loss": 0.0539, "step": 7580 }, { "epoch": 2.79, "learning_rate": 7.367965367965368e-06, "loss": 0.0554, "step": 7590 }, { "epoch": 2.79, "learning_rate": 7.364357864357865e-06, "loss": 0.0541, "step": 7600 }, { "epoch": 2.79, "eval_loss": 0.08575439453125, "eval_runtime": 628.8874, "eval_samples_per_second": 4.405, "eval_steps_per_second": 0.035, "eval_wer": 21.810949298813377, "step": 7600 }, { "epoch": 2.8, "learning_rate": 7.360750360750361e-06, "loss": 0.0561, "step": 7610 }, { "epoch": 2.8, "learning_rate": 7.357142857142858e-06, "loss": 0.0545, "step": 7620 }, { "epoch": 2.81, "learning_rate": 7.353535353535353e-06, "loss": 0.0544, "step": 7630 }, { "epoch": 2.81, "learning_rate": 7.34992784992785e-06, "loss": 0.0543, "step": 7640 }, { "epoch": 2.81, "learning_rate": 7.346320346320347e-06, "loss": 0.0549, "step": 7650 }, { "epoch": 2.82, "learning_rate": 7.342712842712843e-06, "loss": 0.0536, "step": 7660 }, { "epoch": 2.82, "learning_rate": 7.33910533910534e-06, "loss": 0.056, "step": 7670 }, { "epoch": 2.82, "learning_rate": 7.335497835497835e-06, "loss": 0.0545, "step": 7680 }, { "epoch": 2.83, "learning_rate": 7.331890331890332e-06, "loss": 0.054, "step": 7690 }, { "epoch": 2.83, "learning_rate": 7.328282828282829e-06, "loss": 0.0554, "step": 7700 }, { "epoch": 2.83, "eval_loss": 0.08563232421875, "eval_runtime": 906.4345, "eval_samples_per_second": 3.056, "eval_steps_per_second": 0.024, "eval_wer": 21.85477346278317, "step": 7700 }, { "epoch": 2.83, "learning_rate": 7.324675324675325e-06, "loss": 0.0578, "step": 7710 }, { "epoch": 2.84, "learning_rate": 7.321067821067822e-06, "loss": 0.0541, "step": 7720 }, { "epoch": 2.84, "learning_rate": 7.317460317460317e-06, "loss": 0.0557, "step": 7730 }, { "epoch": 2.85, "learning_rate": 7.313852813852814e-06, "loss": 0.0536, "step": 7740 }, { "epoch": 2.85, "learning_rate": 7.310245310245311e-06, "loss": 0.0538, "step": 7750 }, { "epoch": 2.85, "learning_rate": 7.306637806637807e-06, "loss": 0.0533, "step": 7760 }, { "epoch": 2.86, "learning_rate": 7.303030303030304e-06, "loss": 0.0549, "step": 7770 }, { "epoch": 2.86, "learning_rate": 7.299422799422799e-06, "loss": 0.0543, "step": 7780 }, { "epoch": 2.86, "learning_rate": 7.295815295815296e-06, "loss": 0.0561, "step": 7790 }, { "epoch": 2.87, "learning_rate": 7.292207792207793e-06, "loss": 0.0534, "step": 7800 }, { "epoch": 2.87, "eval_loss": 0.08599853515625, "eval_runtime": 594.8444, "eval_samples_per_second": 4.657, "eval_steps_per_second": 0.037, "eval_wer": 21.39630528586839, "step": 7800 }, { "epoch": 2.87, "learning_rate": 7.288600288600289e-06, "loss": 0.0573, "step": 7810 }, { "epoch": 2.88, "learning_rate": 7.284992784992786e-06, "loss": 0.0568, "step": 7820 }, { "epoch": 2.88, "learning_rate": 7.281385281385281e-06, "loss": 0.0554, "step": 7830 }, { "epoch": 2.88, "learning_rate": 7.277777777777778e-06, "loss": 0.0552, "step": 7840 }, { "epoch": 2.89, "learning_rate": 7.274170274170275e-06, "loss": 0.0531, "step": 7850 }, { "epoch": 2.89, "learning_rate": 7.270562770562771e-06, "loss": 0.0539, "step": 7860 }, { "epoch": 2.89, "learning_rate": 7.266955266955268e-06, "loss": 0.0555, "step": 7870 }, { "epoch": 2.9, "learning_rate": 7.263347763347765e-06, "loss": 0.0538, "step": 7880 }, { "epoch": 2.9, "learning_rate": 7.25974025974026e-06, "loss": 0.0552, "step": 7890 }, { "epoch": 2.9, "learning_rate": 7.256132756132757e-06, "loss": 0.0544, "step": 7900 }, { "epoch": 2.9, "eval_loss": 0.08575439453125, "eval_runtime": 276.18, "eval_samples_per_second": 10.03, "eval_steps_per_second": 0.08, "eval_wer": 21.419902912621357, "step": 7900 }, { "epoch": 2.91, "learning_rate": 7.252525252525253e-06, "loss": 0.0526, "step": 7910 }, { "epoch": 2.91, "learning_rate": 7.24891774891775e-06, "loss": 0.0554, "step": 7920 }, { "epoch": 2.92, "learning_rate": 7.245310245310247e-06, "loss": 0.0576, "step": 7930 }, { "epoch": 2.92, "learning_rate": 7.241702741702742e-06, "loss": 0.0552, "step": 7940 }, { "epoch": 2.92, "learning_rate": 7.238095238095239e-06, "loss": 0.0557, "step": 7950 }, { "epoch": 2.93, "learning_rate": 7.234487734487735e-06, "loss": 0.0517, "step": 7960 }, { "epoch": 2.93, "learning_rate": 7.230880230880232e-06, "loss": 0.0537, "step": 7970 }, { "epoch": 2.93, "learning_rate": 7.227272727272729e-06, "loss": 0.0576, "step": 7980 }, { "epoch": 2.94, "learning_rate": 7.223665223665224e-06, "loss": 0.0547, "step": 7990 }, { "epoch": 2.94, "learning_rate": 7.220057720057721e-06, "loss": 0.0556, "step": 8000 }, { "epoch": 2.94, "eval_loss": 0.085205078125, "eval_runtime": 353.3226, "eval_samples_per_second": 7.84, "eval_steps_per_second": 0.062, "eval_wer": 21.64576591154261, "step": 8000 }, { "epoch": 2.94, "learning_rate": 7.216450216450217e-06, "loss": 0.0547, "step": 8010 }, { "epoch": 2.95, "learning_rate": 7.213564213564213e-06, "loss": 0.0531, "step": 8020 }, { "epoch": 2.95, "learning_rate": 7.20995670995671e-06, "loss": 0.0545, "step": 8030 }, { "epoch": 2.96, "learning_rate": 7.206349206349207e-06, "loss": 0.0566, "step": 8040 }, { "epoch": 2.96, "learning_rate": 7.202741702741703e-06, "loss": 0.0556, "step": 8050 }, { "epoch": 2.96, "learning_rate": 7.1991341991342e-06, "loss": 0.0538, "step": 8060 }, { "epoch": 2.97, "learning_rate": 7.195526695526695e-06, "loss": 0.0523, "step": 8070 }, { "epoch": 2.97, "learning_rate": 7.191919191919192e-06, "loss": 0.0556, "step": 8080 }, { "epoch": 2.97, "learning_rate": 7.188311688311689e-06, "loss": 0.0569, "step": 8090 }, { "epoch": 2.98, "learning_rate": 7.184704184704185e-06, "loss": 0.0544, "step": 8100 }, { "epoch": 2.98, "eval_loss": 0.08526611328125, "eval_runtime": 716.9844, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.031, "eval_wer": 21.3524811218986, "step": 8100 } ], "max_steps": 28000, "num_train_epochs": 11, "total_flos": 2.1159834348111245e+21, "trial_name": null, "trial_params": null }