{ "best_metric": 2.3263261318206787, "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base/checkpoint-1168", "epoch": 199.90140845070422, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_loss": 129.7303924560547, "eval_runtime": 7.5345, "eval_samples_per_second": 8.362, "eval_steps_per_second": 1.062, "eval_wer": 1.082609997006884, "step": 8 }, { "epoch": 1.23, "learning_rate": 0.00014, "loss": 58.4765, "step": 10 }, { "epoch": 1.9, "eval_loss": 92.10203552246094, "eval_runtime": 7.564, "eval_samples_per_second": 8.329, "eval_steps_per_second": 1.058, "eval_wer": 1.3475007482789585, "step": 16 }, { "epoch": 2.45, "learning_rate": 0.00032, "loss": 37.6409, "step": 20 }, { "epoch": 2.9, "eval_loss": 23.880123138427734, "eval_runtime": 7.5165, "eval_samples_per_second": 8.382, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 24 }, { "epoch": 3.68, "learning_rate": 0.0003984810126582279, "loss": 6.3535, "step": 30 }, { "epoch": 3.9, "eval_loss": 13.232953071594238, "eval_runtime": 7.5025, "eval_samples_per_second": 8.397, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 32 }, { "epoch": 4.9, "learning_rate": 0.0003959493670886076, "loss": 5.0319, "step": 40 }, { "epoch": 4.9, "eval_loss": 8.361285209655762, "eval_runtime": 7.5177, "eval_samples_per_second": 8.38, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 40 }, { "epoch": 5.9, "eval_loss": 5.518415927886963, "eval_runtime": 7.547, "eval_samples_per_second": 8.348, "eval_steps_per_second": 1.06, "eval_wer": 1.0, "step": 48 }, { "epoch": 6.23, "learning_rate": 0.00039341772151898737, "loss": 4.7733, "step": 50 }, { "epoch": 6.9, "eval_loss": 4.171965599060059, "eval_runtime": 7.494, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.068, "eval_wer": 1.0, "step": 56 }, { "epoch": 7.45, "learning_rate": 0.0003908860759493671, "loss": 3.8911, "step": 60 }, { "epoch": 7.9, "eval_loss": 3.9397051334381104, "eval_runtime": 7.5681, "eval_samples_per_second": 8.324, "eval_steps_per_second": 1.057, "eval_wer": 0.9994013768332835, "step": 64 }, { "epoch": 8.68, "learning_rate": 0.00038835443037974686, "loss": 3.6, "step": 70 }, { "epoch": 8.9, "eval_loss": 4.074159622192383, "eval_runtime": 7.4884, "eval_samples_per_second": 8.413, "eval_steps_per_second": 1.068, "eval_wer": 0.9994013768332835, "step": 72 }, { "epoch": 9.9, "learning_rate": 0.00038582278481012663, "loss": 3.1586, "step": 80 }, { "epoch": 9.9, "eval_loss": 3.8395957946777344, "eval_runtime": 7.5346, "eval_samples_per_second": 8.361, "eval_steps_per_second": 1.062, "eval_wer": 0.9997006884166417, "step": 80 }, { "epoch": 10.9, "eval_loss": 3.8820667266845703, "eval_runtime": 7.5684, "eval_samples_per_second": 8.324, "eval_steps_per_second": 1.057, "eval_wer": 0.9994013768332835, "step": 88 }, { "epoch": 11.23, "learning_rate": 0.00038329113924050635, "loss": 3.3812, "step": 90 }, { "epoch": 11.9, "eval_loss": 3.9755468368530273, "eval_runtime": 7.5139, "eval_samples_per_second": 8.384, "eval_steps_per_second": 1.065, "eval_wer": 0.9979048189164921, "step": 96 }, { "epoch": 12.45, "learning_rate": 0.00038075949367088606, "loss": 3.0088, "step": 100 }, { "epoch": 12.9, "eval_loss": 4.529850006103516, "eval_runtime": 7.5521, "eval_samples_per_second": 8.342, "eval_steps_per_second": 1.059, "eval_wer": 0.9970068841664173, "step": 104 }, { "epoch": 13.68, "learning_rate": 0.00037822784810126583, "loss": 2.7633, "step": 110 }, { "epoch": 13.9, "eval_loss": 4.139248371124268, "eval_runtime": 7.5513, "eval_samples_per_second": 8.343, "eval_steps_per_second": 1.059, "eval_wer": 0.9964082609997007, "step": 112 }, { "epoch": 14.9, "learning_rate": 0.0003756962025316456, "loss": 2.6359, "step": 120 }, { "epoch": 14.9, "eval_loss": 3.8528494834899902, "eval_runtime": 7.555, "eval_samples_per_second": 8.339, "eval_steps_per_second": 1.059, "eval_wer": 0.9985034420832086, "step": 120 }, { "epoch": 15.9, "eval_loss": 3.4155988693237305, "eval_runtime": 7.5157, "eval_samples_per_second": 8.382, "eval_steps_per_second": 1.064, "eval_wer": 0.9973061957497755, "step": 128 }, { "epoch": 16.23, "learning_rate": 0.0003731645569620253, "loss": 3.1964, "step": 130 }, { "epoch": 16.9, "eval_loss": 3.374109983444214, "eval_runtime": 7.5568, "eval_samples_per_second": 8.337, "eval_steps_per_second": 1.059, "eval_wer": 0.996707572583059, "step": 136 }, { "epoch": 17.45, "learning_rate": 0.0003706329113924051, "loss": 2.4962, "step": 140 }, { "epoch": 17.9, "eval_loss": 3.6131513118743896, "eval_runtime": 7.535, "eval_samples_per_second": 8.361, "eval_steps_per_second": 1.062, "eval_wer": 0.9922178988326849, "step": 144 }, { "epoch": 18.68, "learning_rate": 0.0003681012658227848, "loss": 2.4907, "step": 150 }, { "epoch": 18.9, "eval_loss": 3.5363380908966064, "eval_runtime": 7.513, "eval_samples_per_second": 8.385, "eval_steps_per_second": 1.065, "eval_wer": 0.9958096378329842, "step": 152 }, { "epoch": 19.9, "learning_rate": 0.0003655696202531646, "loss": 2.413, "step": 160 }, { "epoch": 19.9, "eval_loss": 3.6351335048675537, "eval_runtime": 7.5038, "eval_samples_per_second": 8.396, "eval_steps_per_second": 1.066, "eval_wer": 0.9928165219994014, "step": 160 }, { "epoch": 20.9, "eval_loss": 3.6112582683563232, "eval_runtime": 7.5044, "eval_samples_per_second": 8.395, "eval_steps_per_second": 1.066, "eval_wer": 0.9925172104160431, "step": 168 }, { "epoch": 21.23, "learning_rate": 0.0003630379746835443, "loss": 2.5555, "step": 170 }, { "epoch": 21.9, "eval_loss": 3.7119691371917725, "eval_runtime": 7.5276, "eval_samples_per_second": 8.369, "eval_steps_per_second": 1.063, "eval_wer": 0.9904220293325352, "step": 176 }, { "epoch": 22.45, "learning_rate": 0.00036050632911392407, "loss": 2.2909, "step": 180 }, { "epoch": 22.9, "eval_loss": 3.43344783782959, "eval_runtime": 7.4907, "eval_samples_per_second": 8.41, "eval_steps_per_second": 1.068, "eval_wer": 0.99131996408261, "step": 184 }, { "epoch": 23.68, "learning_rate": 0.00035797468354430384, "loss": 2.298, "step": 190 }, { "epoch": 23.9, "eval_loss": 3.5457568168640137, "eval_runtime": 7.4274, "eval_samples_per_second": 8.482, "eval_steps_per_second": 1.077, "eval_wer": 0.99131996408261, "step": 192 }, { "epoch": 24.9, "learning_rate": 0.00035544303797468356, "loss": 2.2366, "step": 200 }, { "epoch": 24.9, "eval_loss": 3.7104668617248535, "eval_runtime": 7.5469, "eval_samples_per_second": 8.348, "eval_steps_per_second": 1.06, "eval_wer": 0.9898234061658187, "step": 200 }, { "epoch": 25.9, "eval_loss": 4.229844093322754, "eval_runtime": 7.5144, "eval_samples_per_second": 8.384, "eval_steps_per_second": 1.065, "eval_wer": 0.9865309787488776, "step": 208 }, { "epoch": 26.23, "learning_rate": 0.00035291139240506333, "loss": 2.3428, "step": 210 }, { "epoch": 26.9, "eval_loss": 3.3207037448883057, "eval_runtime": 7.5005, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.067, "eval_wer": 0.9889254714157438, "step": 216 }, { "epoch": 27.45, "learning_rate": 0.00035037974683544304, "loss": 2.2519, "step": 220 }, { "epoch": 27.9, "eval_loss": 3.154818058013916, "eval_runtime": 7.5146, "eval_samples_per_second": 8.384, "eval_steps_per_second": 1.065, "eval_wer": 0.9895240945824604, "step": 224 }, { "epoch": 28.68, "learning_rate": 0.0003478481012658228, "loss": 2.0429, "step": 230 }, { "epoch": 28.9, "eval_loss": 3.7694132328033447, "eval_runtime": 7.5309, "eval_samples_per_second": 8.366, "eval_steps_per_second": 1.062, "eval_wer": 0.9877282250823107, "step": 232 }, { "epoch": 29.9, "learning_rate": 0.0003453164556962026, "loss": 2.1886, "step": 240 }, { "epoch": 29.9, "eval_loss": 3.5341339111328125, "eval_runtime": 7.4895, "eval_samples_per_second": 8.412, "eval_steps_per_second": 1.068, "eval_wer": 0.9877282250823107, "step": 240 }, { "epoch": 30.9, "eval_loss": 3.138820171356201, "eval_runtime": 7.4982, "eval_samples_per_second": 8.402, "eval_steps_per_second": 1.067, "eval_wer": 0.9850344208320863, "step": 248 }, { "epoch": 31.23, "learning_rate": 0.0003427848101265823, "loss": 2.2182, "step": 250 }, { "epoch": 31.9, "eval_loss": 2.938950538635254, "eval_runtime": 7.535, "eval_samples_per_second": 8.361, "eval_steps_per_second": 1.062, "eval_wer": 0.9817419934151451, "step": 256 }, { "epoch": 32.45, "learning_rate": 0.000340253164556962, "loss": 1.9479, "step": 260 }, { "epoch": 32.9, "eval_loss": 3.0059807300567627, "eval_runtime": 7.5148, "eval_samples_per_second": 8.383, "eval_steps_per_second": 1.065, "eval_wer": 0.98263992816522, "step": 264 }, { "epoch": 33.68, "learning_rate": 0.0003377215189873418, "loss": 1.9703, "step": 270 }, { "epoch": 33.9, "eval_loss": 3.257068395614624, "eval_runtime": 7.5032, "eval_samples_per_second": 8.396, "eval_steps_per_second": 1.066, "eval_wer": 0.9787488775815624, "step": 272 }, { "epoch": 34.9, "learning_rate": 0.00033518987341772156, "loss": 1.9385, "step": 280 }, { "epoch": 34.9, "eval_loss": 3.1085846424102783, "eval_runtime": 7.5414, "eval_samples_per_second": 8.354, "eval_steps_per_second": 1.061, "eval_wer": 0.9775516312481293, "step": 280 }, { "epoch": 35.9, "eval_loss": 2.8231420516967773, "eval_runtime": 7.5276, "eval_samples_per_second": 8.369, "eval_steps_per_second": 1.063, "eval_wer": 0.9655791679137983, "step": 288 }, { "epoch": 36.23, "learning_rate": 0.0003326582278481013, "loss": 2.0297, "step": 290 }, { "epoch": 36.9, "eval_loss": 2.8960585594177246, "eval_runtime": 7.4607, "eval_samples_per_second": 8.444, "eval_steps_per_second": 1.072, "eval_wer": 0.9667764142472314, "step": 296 }, { "epoch": 37.45, "learning_rate": 0.000330126582278481, "loss": 1.8406, "step": 300 }, { "epoch": 37.9, "eval_loss": 2.882859706878662, "eval_runtime": 7.4625, "eval_samples_per_second": 8.442, "eval_steps_per_second": 1.072, "eval_wer": 0.9670757258305896, "step": 304 }, { "epoch": 38.68, "learning_rate": 0.00032759493670886077, "loss": 1.8707, "step": 310 }, { "epoch": 38.9, "eval_loss": 3.1193575859069824, "eval_runtime": 7.5322, "eval_samples_per_second": 8.364, "eval_steps_per_second": 1.062, "eval_wer": 0.9583956899131997, "step": 312 }, { "epoch": 39.9, "learning_rate": 0.00032506329113924054, "loss": 1.7798, "step": 320 }, { "epoch": 39.9, "eval_loss": 3.0685505867004395, "eval_runtime": 7.5634, "eval_samples_per_second": 8.33, "eval_steps_per_second": 1.058, "eval_wer": 0.959892247829991, "step": 320 }, { "epoch": 40.9, "eval_loss": 2.727964162826538, "eval_runtime": 7.4937, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.068, "eval_wer": 0.9527087698293923, "step": 328 }, { "epoch": 41.23, "learning_rate": 0.00032253164556962026, "loss": 1.9163, "step": 330 }, { "epoch": 41.9, "eval_loss": 2.6320998668670654, "eval_runtime": 7.5136, "eval_samples_per_second": 8.385, "eval_steps_per_second": 1.065, "eval_wer": 0.9476204729123017, "step": 336 }, { "epoch": 42.45, "learning_rate": 0.00032, "loss": 1.7248, "step": 340 }, { "epoch": 42.9, "eval_loss": 2.6813271045684814, "eval_runtime": 7.5252, "eval_samples_per_second": 8.372, "eval_steps_per_second": 1.063, "eval_wer": 0.9413349296617779, "step": 344 }, { "epoch": 43.68, "learning_rate": 0.00031746835443037974, "loss": 1.7602, "step": 350 }, { "epoch": 43.9, "eval_loss": 2.725241184234619, "eval_runtime": 7.544, "eval_samples_per_second": 8.351, "eval_steps_per_second": 1.06, "eval_wer": 0.9419335528284944, "step": 352 }, { "epoch": 44.9, "learning_rate": 0.0003149367088607595, "loss": 1.7357, "step": 360 }, { "epoch": 44.9, "eval_loss": 3.03348708152771, "eval_runtime": 7.4787, "eval_samples_per_second": 8.424, "eval_steps_per_second": 1.07, "eval_wer": 0.9398383717449865, "step": 360 }, { "epoch": 45.9, "eval_loss": 2.8731963634490967, "eval_runtime": 7.5025, "eval_samples_per_second": 8.397, "eval_steps_per_second": 1.066, "eval_wer": 0.9344507632445376, "step": 368 }, { "epoch": 46.23, "learning_rate": 0.0003124050632911393, "loss": 1.7997, "step": 370 }, { "epoch": 46.9, "eval_loss": 2.7708871364593506, "eval_runtime": 7.5081, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.066, "eval_wer": 0.9353486979946124, "step": 376 }, { "epoch": 47.45, "learning_rate": 0.000309873417721519, "loss": 1.6268, "step": 380 }, { "epoch": 47.9, "eval_loss": 2.768134832382202, "eval_runtime": 7.5125, "eval_samples_per_second": 8.386, "eval_steps_per_second": 1.065, "eval_wer": 0.9278659084106555, "step": 384 }, { "epoch": 48.68, "learning_rate": 0.0003073417721518987, "loss": 1.6527, "step": 390 }, { "epoch": 48.9, "eval_loss": 2.825923204421997, "eval_runtime": 7.512, "eval_samples_per_second": 8.387, "eval_steps_per_second": 1.065, "eval_wer": 0.9302604010775217, "step": 392 }, { "epoch": 49.9, "learning_rate": 0.0003048101265822785, "loss": 1.5715, "step": 400 }, { "epoch": 49.9, "eval_loss": 2.8840715885162354, "eval_runtime": 7.525, "eval_samples_per_second": 8.372, "eval_steps_per_second": 1.063, "eval_wer": 0.9341514516611793, "step": 400 }, { "epoch": 50.9, "eval_loss": 2.794407367706299, "eval_runtime": 7.4742, "eval_samples_per_second": 8.429, "eval_steps_per_second": 1.07, "eval_wer": 0.9221789883268483, "step": 408 }, { "epoch": 51.23, "learning_rate": 0.00030227848101265826, "loss": 1.6903, "step": 410 }, { "epoch": 51.9, "eval_loss": 3.1597204208374023, "eval_runtime": 7.5, "eval_samples_per_second": 8.4, "eval_steps_per_second": 1.067, "eval_wer": 0.9203831188266985, "step": 416 }, { "epoch": 52.45, "learning_rate": 0.000299746835443038, "loss": 1.5722, "step": 420 }, { "epoch": 52.9, "eval_loss": 2.559480667114258, "eval_runtime": 7.4456, "eval_samples_per_second": 8.461, "eval_steps_per_second": 1.074, "eval_wer": 0.9212810535767734, "step": 424 }, { "epoch": 53.68, "learning_rate": 0.0002972151898734177, "loss": 1.539, "step": 430 }, { "epoch": 53.9, "eval_loss": 2.816045045852661, "eval_runtime": 7.4737, "eval_samples_per_second": 8.43, "eval_steps_per_second": 1.07, "eval_wer": 0.9117030829093086, "step": 432 }, { "epoch": 54.9, "learning_rate": 0.0002946835443037975, "loss": 1.538, "step": 440 }, { "epoch": 54.9, "eval_loss": 2.565622091293335, "eval_runtime": 7.4709, "eval_samples_per_second": 8.433, "eval_steps_per_second": 1.071, "eval_wer": 0.9167913798263992, "step": 440 }, { "epoch": 55.9, "eval_loss": 2.9076685905456543, "eval_runtime": 7.5072, "eval_samples_per_second": 8.392, "eval_steps_per_second": 1.066, "eval_wer": 0.9075127207422927, "step": 448 }, { "epoch": 56.23, "learning_rate": 0.00029215189873417724, "loss": 1.624, "step": 450 }, { "epoch": 56.9, "eval_loss": 2.872545003890991, "eval_runtime": 7.4843, "eval_samples_per_second": 8.418, "eval_steps_per_second": 1.069, "eval_wer": 0.8985333732415445, "step": 456 }, { "epoch": 57.45, "learning_rate": 0.00028962025316455695, "loss": 1.5052, "step": 460 }, { "epoch": 57.9, "eval_loss": 2.621701240539551, "eval_runtime": 7.5267, "eval_samples_per_second": 8.37, "eval_steps_per_second": 1.063, "eval_wer": 0.8901526489075127, "step": 464 }, { "epoch": 58.68, "learning_rate": 0.0002870886075949367, "loss": 1.4367, "step": 470 }, { "epoch": 58.9, "eval_loss": 2.5042622089385986, "eval_runtime": 7.5036, "eval_samples_per_second": 8.396, "eval_steps_per_second": 1.066, "eval_wer": 0.8976354384914697, "step": 472 }, { "epoch": 59.9, "learning_rate": 0.0002845569620253165, "loss": 1.4814, "step": 480 }, { "epoch": 59.9, "eval_loss": 3.090513229370117, "eval_runtime": 7.5001, "eval_samples_per_second": 8.4, "eval_steps_per_second": 1.067, "eval_wer": 0.8949416342412452, "step": 480 }, { "epoch": 60.9, "eval_loss": 2.7212915420532227, "eval_runtime": 7.5374, "eval_samples_per_second": 8.358, "eval_steps_per_second": 1.061, "eval_wer": 0.899131996408261, "step": 488 }, { "epoch": 61.23, "learning_rate": 0.0002820253164556962, "loss": 1.5696, "step": 490 }, { "epoch": 61.9, "eval_loss": 2.6882176399230957, "eval_runtime": 7.5273, "eval_samples_per_second": 8.37, "eval_steps_per_second": 1.063, "eval_wer": 0.8913498952409459, "step": 496 }, { "epoch": 62.45, "learning_rate": 0.00027949367088607593, "loss": 1.4408, "step": 500 }, { "epoch": 62.9, "eval_loss": 2.732247829437256, "eval_runtime": 7.5125, "eval_samples_per_second": 8.386, "eval_steps_per_second": 1.065, "eval_wer": 0.8736905118228075, "step": 504 }, { "epoch": 63.68, "learning_rate": 0.0002769620253164557, "loss": 1.4065, "step": 510 }, { "epoch": 63.9, "eval_loss": 2.7024385929107666, "eval_runtime": 7.5194, "eval_samples_per_second": 8.378, "eval_steps_per_second": 1.064, "eval_wer": 0.8826698593235558, "step": 512 }, { "epoch": 64.9, "learning_rate": 0.00027443037974683547, "loss": 1.3989, "step": 520 }, { "epoch": 64.9, "eval_loss": 2.6808624267578125, "eval_runtime": 7.5114, "eval_samples_per_second": 8.387, "eval_steps_per_second": 1.065, "eval_wer": 0.8805746782400479, "step": 520 }, { "epoch": 65.9, "eval_loss": 2.6340062618255615, "eval_runtime": 7.4638, "eval_samples_per_second": 8.441, "eval_steps_per_second": 1.072, "eval_wer": 0.8865609099072134, "step": 528 }, { "epoch": 66.23, "learning_rate": 0.0002718987341772152, "loss": 1.5102, "step": 530 }, { "epoch": 66.9, "eval_loss": 2.865128993988037, "eval_runtime": 7.4899, "eval_samples_per_second": 8.411, "eval_steps_per_second": 1.068, "eval_wer": 0.885064351990422, "step": 536 }, { "epoch": 67.45, "learning_rate": 0.00026936708860759496, "loss": 1.4158, "step": 540 }, { "epoch": 67.9, "eval_loss": 2.7928247451782227, "eval_runtime": 7.4996, "eval_samples_per_second": 8.401, "eval_steps_per_second": 1.067, "eval_wer": 0.8757856929063155, "step": 544 }, { "epoch": 68.68, "learning_rate": 0.0002668354430379747, "loss": 1.3322, "step": 550 }, { "epoch": 68.9, "eval_loss": 2.754878282546997, "eval_runtime": 7.529, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.063, "eval_wer": 0.8799760550733313, "step": 552 }, { "epoch": 69.9, "learning_rate": 0.00026430379746835445, "loss": 1.4226, "step": 560 }, { "epoch": 69.9, "eval_loss": 2.6617932319641113, "eval_runtime": 7.6013, "eval_samples_per_second": 8.288, "eval_steps_per_second": 1.052, "eval_wer": 0.8775815624064651, "step": 560 }, { "epoch": 70.9, "eval_loss": 2.6936562061309814, "eval_runtime": 7.5105, "eval_samples_per_second": 8.388, "eval_steps_per_second": 1.065, "eval_wer": 0.8650104759054176, "step": 568 }, { "epoch": 71.23, "learning_rate": 0.0002617721518987342, "loss": 1.4735, "step": 570 }, { "epoch": 71.9, "eval_loss": 2.628467321395874, "eval_runtime": 7.5847, "eval_samples_per_second": 8.306, "eval_steps_per_second": 1.055, "eval_wer": 0.8668063454055672, "step": 576 }, { "epoch": 72.45, "learning_rate": 0.00025924050632911394, "loss": 1.338, "step": 580 }, { "epoch": 72.9, "eval_loss": 2.5627574920654297, "eval_runtime": 7.5097, "eval_samples_per_second": 8.389, "eval_steps_per_second": 1.065, "eval_wer": 0.8668063454055672, "step": 584 }, { "epoch": 73.68, "learning_rate": 0.00025670886075949365, "loss": 1.335, "step": 590 }, { "epoch": 73.9, "eval_loss": 2.4783389568328857, "eval_runtime": 7.4904, "eval_samples_per_second": 8.411, "eval_steps_per_second": 1.068, "eval_wer": 0.8608201137384017, "step": 592 }, { "epoch": 74.9, "learning_rate": 0.0002541772151898734, "loss": 1.3433, "step": 600 }, { "epoch": 74.9, "eval_loss": 2.654869318008423, "eval_runtime": 7.5287, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.063, "eval_wer": 0.8605208021550435, "step": 600 }, { "epoch": 75.9, "eval_loss": 2.3850927352905273, "eval_runtime": 7.5228, "eval_samples_per_second": 8.375, "eval_steps_per_second": 1.063, "eval_wer": 0.853636635737803, "step": 608 }, { "epoch": 76.23, "learning_rate": 0.0002516455696202532, "loss": 1.4341, "step": 610 }, { "epoch": 76.9, "eval_loss": 2.6057424545288086, "eval_runtime": 7.5058, "eval_samples_per_second": 8.394, "eval_steps_per_second": 1.066, "eval_wer": 0.8641125411553428, "step": 616 }, { "epoch": 77.45, "learning_rate": 0.0002491139240506329, "loss": 1.3036, "step": 620 }, { "epoch": 77.9, "eval_loss": 2.414400100708008, "eval_runtime": 7.5426, "eval_samples_per_second": 8.353, "eval_steps_per_second": 1.061, "eval_wer": 0.8614187369051183, "step": 624 }, { "epoch": 78.68, "learning_rate": 0.00024658227848101263, "loss": 1.2617, "step": 630 }, { "epoch": 78.9, "eval_loss": 2.500195264816284, "eval_runtime": 7.4699, "eval_samples_per_second": 8.434, "eval_steps_per_second": 1.071, "eval_wer": 0.8596228674049686, "step": 632 }, { "epoch": 79.9, "learning_rate": 0.00024405063291139243, "loss": 1.308, "step": 640 }, { "epoch": 79.9, "eval_loss": 2.492858648300171, "eval_runtime": 7.51, "eval_samples_per_second": 8.389, "eval_steps_per_second": 1.065, "eval_wer": 0.8575276863214606, "step": 640 }, { "epoch": 80.9, "eval_loss": 2.6992645263671875, "eval_runtime": 7.5434, "eval_samples_per_second": 8.352, "eval_steps_per_second": 1.061, "eval_wer": 0.8461538461538461, "step": 648 }, { "epoch": 81.23, "learning_rate": 0.00024151898734177217, "loss": 1.3877, "step": 650 }, { "epoch": 81.9, "eval_loss": 2.587432861328125, "eval_runtime": 7.5224, "eval_samples_per_second": 8.375, "eval_steps_per_second": 1.063, "eval_wer": 0.8509428314875785, "step": 656 }, { "epoch": 82.45, "learning_rate": 0.0002389873417721519, "loss": 1.2553, "step": 660 }, { "epoch": 82.9, "eval_loss": 2.6430294513702393, "eval_runtime": 7.5162, "eval_samples_per_second": 8.382, "eval_steps_per_second": 1.064, "eval_wer": 0.853636635737803, "step": 664 }, { "epoch": 83.68, "learning_rate": 0.00023645569620253169, "loss": 1.211, "step": 670 }, { "epoch": 83.9, "eval_loss": 3.0369396209716797, "eval_runtime": 7.4884, "eval_samples_per_second": 8.413, "eval_steps_per_second": 1.068, "eval_wer": 0.8497455851541454, "step": 672 }, { "epoch": 84.9, "learning_rate": 0.0002339240506329114, "loss": 1.2647, "step": 680 }, { "epoch": 84.9, "eval_loss": 2.701183795928955, "eval_runtime": 7.5489, "eval_samples_per_second": 8.346, "eval_steps_per_second": 1.06, "eval_wer": 0.84375935348698, "step": 680 }, { "epoch": 85.9, "eval_loss": 2.5128839015960693, "eval_runtime": 7.5032, "eval_samples_per_second": 8.396, "eval_steps_per_second": 1.066, "eval_wer": 0.8581263094881771, "step": 688 }, { "epoch": 86.23, "learning_rate": 0.00023139240506329115, "loss": 1.3168, "step": 690 }, { "epoch": 86.9, "eval_loss": 2.512349843978882, "eval_runtime": 7.5078, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.066, "eval_wer": 0.8452559114037713, "step": 696 }, { "epoch": 87.45, "learning_rate": 0.00022886075949367092, "loss": 1.1997, "step": 700 }, { "epoch": 87.9, "eval_loss": 2.459209442138672, "eval_runtime": 7.507, "eval_samples_per_second": 8.392, "eval_steps_per_second": 1.066, "eval_wer": 0.850344208320862, "step": 704 }, { "epoch": 88.68, "learning_rate": 0.00022632911392405066, "loss": 1.1866, "step": 710 }, { "epoch": 88.9, "eval_loss": 2.6305787563323975, "eval_runtime": 7.5109, "eval_samples_per_second": 8.388, "eval_steps_per_second": 1.065, "eval_wer": 0.8419634839868303, "step": 712 }, { "epoch": 89.9, "learning_rate": 0.00022379746835443038, "loss": 1.2396, "step": 720 }, { "epoch": 89.9, "eval_loss": 2.4730277061462402, "eval_runtime": 7.5381, "eval_samples_per_second": 8.358, "eval_steps_per_second": 1.061, "eval_wer": 0.8458545345704879, "step": 720 }, { "epoch": 90.9, "eval_loss": 2.6145691871643066, "eval_runtime": 7.4985, "eval_samples_per_second": 8.402, "eval_steps_per_second": 1.067, "eval_wer": 0.8488476504040706, "step": 728 }, { "epoch": 91.23, "learning_rate": 0.00022126582278481012, "loss": 1.3184, "step": 730 }, { "epoch": 91.9, "eval_loss": 2.620443105697632, "eval_runtime": 7.5157, "eval_samples_per_second": 8.383, "eval_steps_per_second": 1.064, "eval_wer": 0.8422627955701886, "step": 736 }, { "epoch": 92.45, "learning_rate": 0.0002187341772151899, "loss": 1.1704, "step": 740 }, { "epoch": 92.9, "eval_loss": 2.8895633220672607, "eval_runtime": 7.493, "eval_samples_per_second": 8.408, "eval_steps_per_second": 1.068, "eval_wer": 0.8440586650703382, "step": 744 }, { "epoch": 93.68, "learning_rate": 0.00021620253164556964, "loss": 1.1436, "step": 750 }, { "epoch": 93.9, "eval_loss": 2.997091770172119, "eval_runtime": 7.5434, "eval_samples_per_second": 8.352, "eval_steps_per_second": 1.061, "eval_wer": 0.8389703681532475, "step": 752 }, { "epoch": 94.9, "learning_rate": 0.00021367088607594935, "loss": 1.1716, "step": 760 }, { "epoch": 94.9, "eval_loss": 2.729346513748169, "eval_runtime": 7.4914, "eval_samples_per_second": 8.41, "eval_steps_per_second": 1.068, "eval_wer": 0.8479497156539958, "step": 760 }, { "epoch": 95.9, "eval_loss": 2.9620306491851807, "eval_runtime": 7.4885, "eval_samples_per_second": 8.413, "eval_steps_per_second": 1.068, "eval_wer": 0.8425621071535468, "step": 768 }, { "epoch": 96.23, "learning_rate": 0.00021113924050632915, "loss": 1.2487, "step": 770 }, { "epoch": 96.9, "eval_loss": 2.6880440711975098, "eval_runtime": 7.5581, "eval_samples_per_second": 8.335, "eval_steps_per_second": 1.058, "eval_wer": 0.8332834480694403, "step": 776 }, { "epoch": 97.45, "learning_rate": 0.00020860759493670887, "loss": 1.118, "step": 780 }, { "epoch": 97.9, "eval_loss": 2.6754331588745117, "eval_runtime": 7.4991, "eval_samples_per_second": 8.401, "eval_steps_per_second": 1.067, "eval_wer": 0.8320862017360072, "step": 784 }, { "epoch": 98.68, "learning_rate": 0.0002060759493670886, "loss": 1.186, "step": 790 }, { "epoch": 98.9, "eval_loss": 2.6924636363983154, "eval_runtime": 7.4702, "eval_samples_per_second": 8.433, "eval_steps_per_second": 1.071, "eval_wer": 0.8341813828195151, "step": 792 }, { "epoch": 99.9, "learning_rate": 0.00020354430379746838, "loss": 1.1373, "step": 800 }, { "epoch": 99.9, "eval_loss": 2.9206714630126953, "eval_runtime": 7.4921, "eval_samples_per_second": 8.409, "eval_steps_per_second": 1.068, "eval_wer": 0.8338820712361569, "step": 800 }, { "epoch": 100.9, "eval_loss": 2.8559393882751465, "eval_runtime": 7.4976, "eval_samples_per_second": 8.403, "eval_steps_per_second": 1.067, "eval_wer": 0.8353786291529482, "step": 808 }, { "epoch": 101.23, "learning_rate": 0.00020101265822784813, "loss": 1.2086, "step": 810 }, { "epoch": 101.9, "eval_loss": 2.977409601211548, "eval_runtime": 7.6531, "eval_samples_per_second": 8.232, "eval_steps_per_second": 1.045, "eval_wer": 0.8335827596527986, "step": 816 }, { "epoch": 102.45, "learning_rate": 0.00019848101265822785, "loss": 1.1227, "step": 820 }, { "epoch": 102.9, "eval_loss": 3.0108087062835693, "eval_runtime": 7.4953, "eval_samples_per_second": 8.405, "eval_steps_per_second": 1.067, "eval_wer": 0.8192158036516013, "step": 824 }, { "epoch": 103.68, "learning_rate": 0.00019594936708860762, "loss": 1.1446, "step": 830 }, { "epoch": 103.9, "eval_loss": 2.8996899127960205, "eval_runtime": 7.5008, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.067, "eval_wer": 0.8269979048189164, "step": 832 }, { "epoch": 104.9, "learning_rate": 0.00019341772151898736, "loss": 1.1142, "step": 840 }, { "epoch": 104.9, "eval_loss": 2.662632703781128, "eval_runtime": 7.5291, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.063, "eval_wer": 0.8305896438192159, "step": 840 }, { "epoch": 105.9, "eval_loss": 2.7736780643463135, "eval_runtime": 7.5386, "eval_samples_per_second": 8.357, "eval_steps_per_second": 1.061, "eval_wer": 0.8195151152349596, "step": 848 }, { "epoch": 106.23, "learning_rate": 0.0001908860759493671, "loss": 1.1665, "step": 850 }, { "epoch": 106.9, "eval_loss": 2.544724225997925, "eval_runtime": 7.4986, "eval_samples_per_second": 8.402, "eval_steps_per_second": 1.067, "eval_wer": 0.8186171804848847, "step": 856 }, { "epoch": 107.45, "learning_rate": 0.00018835443037974685, "loss": 1.1, "step": 860 }, { "epoch": 107.9, "eval_loss": 2.4471564292907715, "eval_runtime": 7.4869, "eval_samples_per_second": 8.415, "eval_steps_per_second": 1.069, "eval_wer": 0.8311882669859324, "step": 864 }, { "epoch": 108.68, "learning_rate": 0.0001858227848101266, "loss": 1.0674, "step": 870 }, { "epoch": 108.9, "eval_loss": 2.406235933303833, "eval_runtime": 7.5313, "eval_samples_per_second": 8.365, "eval_steps_per_second": 1.062, "eval_wer": 0.8225082310685423, "step": 872 }, { "epoch": 109.9, "learning_rate": 0.00018329113924050634, "loss": 1.0556, "step": 880 }, { "epoch": 109.9, "eval_loss": 2.4098057746887207, "eval_runtime": 7.4614, "eval_samples_per_second": 8.444, "eval_steps_per_second": 1.072, "eval_wer": 0.8246034121520502, "step": 880 }, { "epoch": 110.9, "eval_loss": 2.344670534133911, "eval_runtime": 7.5006, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.067, "eval_wer": 0.8255013469021251, "step": 888 }, { "epoch": 111.23, "learning_rate": 0.00018075949367088608, "loss": 1.1834, "step": 890 }, { "epoch": 111.9, "eval_loss": 2.557053804397583, "eval_runtime": 7.5289, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.063, "eval_wer": 0.8066447171505537, "step": 896 }, { "epoch": 112.45, "learning_rate": 0.00017822784810126582, "loss": 1.0533, "step": 900 }, { "epoch": 112.9, "eval_loss": 2.5983164310455322, "eval_runtime": 7.5273, "eval_samples_per_second": 8.369, "eval_steps_per_second": 1.063, "eval_wer": 0.8150254414845854, "step": 904 }, { "epoch": 113.68, "learning_rate": 0.0001756962025316456, "loss": 1.101, "step": 910 }, { "epoch": 113.9, "eval_loss": 2.6911237239837646, "eval_runtime": 7.6253, "eval_samples_per_second": 8.262, "eval_steps_per_second": 1.049, "eval_wer": 0.7949715653995809, "step": 912 }, { "epoch": 114.9, "learning_rate": 0.0001731645569620253, "loss": 1.0633, "step": 920 }, { "epoch": 114.9, "eval_loss": 2.573255777359009, "eval_runtime": 7.4999, "eval_samples_per_second": 8.4, "eval_steps_per_second": 1.067, "eval_wer": 0.8078419634839868, "step": 920 }, { "epoch": 115.9, "eval_loss": 2.5812604427337646, "eval_runtime": 7.5652, "eval_samples_per_second": 8.328, "eval_steps_per_second": 1.057, "eval_wer": 0.8192158036516013, "step": 928 }, { "epoch": 116.23, "learning_rate": 0.00017063291139240508, "loss": 1.1512, "step": 930 }, { "epoch": 116.9, "eval_loss": 2.623731851577759, "eval_runtime": 7.5059, "eval_samples_per_second": 8.393, "eval_steps_per_second": 1.066, "eval_wer": 0.8135288835677941, "step": 936 }, { "epoch": 117.45, "learning_rate": 0.00016810126582278483, "loss": 1.0317, "step": 940 }, { "epoch": 117.9, "eval_loss": 2.5565974712371826, "eval_runtime": 7.5083, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.065, "eval_wer": 0.8030529781502544, "step": 944 }, { "epoch": 118.68, "learning_rate": 0.00016556962025316457, "loss": 1.0117, "step": 950 }, { "epoch": 118.9, "eval_loss": 2.5485475063323975, "eval_runtime": 7.4871, "eval_samples_per_second": 8.414, "eval_steps_per_second": 1.069, "eval_wer": 0.8015564202334631, "step": 952 }, { "epoch": 119.9, "learning_rate": 0.00016303797468354431, "loss": 1.0556, "step": 960 }, { "epoch": 119.9, "eval_loss": 2.4270551204681396, "eval_runtime": 7.5607, "eval_samples_per_second": 8.333, "eval_steps_per_second": 1.058, "eval_wer": 0.7979646812331638, "step": 960 }, { "epoch": 120.9, "eval_loss": 2.6578969955444336, "eval_runtime": 7.5132, "eval_samples_per_second": 8.385, "eval_steps_per_second": 1.065, "eval_wer": 0.7940736306495061, "step": 968 }, { "epoch": 121.23, "learning_rate": 0.00016050632911392406, "loss": 1.1204, "step": 970 }, { "epoch": 121.9, "eval_loss": 2.6150407791137695, "eval_runtime": 7.467, "eval_samples_per_second": 8.437, "eval_steps_per_second": 1.071, "eval_wer": 0.7943729422328644, "step": 976 }, { "epoch": 122.45, "learning_rate": 0.0001579746835443038, "loss": 1.0378, "step": 980 }, { "epoch": 122.9, "eval_loss": 2.549945116043091, "eval_runtime": 7.4938, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.068, "eval_wer": 0.8024543549835379, "step": 984 }, { "epoch": 123.68, "learning_rate": 0.00015544303797468355, "loss": 1.0213, "step": 990 }, { "epoch": 123.9, "eval_loss": 2.72731876373291, "eval_runtime": 7.5219, "eval_samples_per_second": 8.376, "eval_steps_per_second": 1.064, "eval_wer": 0.7937743190661478, "step": 992 }, { "epoch": 124.9, "learning_rate": 0.0001529113924050633, "loss": 1.0247, "step": 1000 }, { "epoch": 124.9, "eval_loss": 2.752246618270874, "eval_runtime": 7.5067, "eval_samples_per_second": 8.392, "eval_steps_per_second": 1.066, "eval_wer": 0.7862915294821909, "step": 1000 }, { "epoch": 125.9, "eval_loss": 2.916325569152832, "eval_runtime": 7.5407, "eval_samples_per_second": 8.355, "eval_steps_per_second": 1.061, "eval_wer": 0.797066746483089, "step": 1008 }, { "epoch": 126.23, "learning_rate": 0.00015037974683544303, "loss": 1.0939, "step": 1010 }, { "epoch": 126.9, "eval_loss": 2.622096300125122, "eval_runtime": 7.5442, "eval_samples_per_second": 8.351, "eval_steps_per_second": 1.06, "eval_wer": 0.789583956899132, "step": 1016 }, { "epoch": 127.45, "learning_rate": 0.0001478481012658228, "loss": 1.0399, "step": 1020 }, { "epoch": 127.9, "eval_loss": 2.9418468475341797, "eval_runtime": 7.4804, "eval_samples_per_second": 8.422, "eval_steps_per_second": 1.069, "eval_wer": 0.80035917390003, "step": 1024 }, { "epoch": 128.68, "learning_rate": 0.00014531645569620252, "loss": 1.0233, "step": 1030 }, { "epoch": 128.9, "eval_loss": 2.755779266357422, "eval_runtime": 7.4931, "eval_samples_per_second": 8.408, "eval_steps_per_second": 1.068, "eval_wer": 0.7856929063154744, "step": 1032 }, { "epoch": 129.9, "learning_rate": 0.0001427848101265823, "loss": 0.9702, "step": 1040 }, { "epoch": 129.9, "eval_loss": 2.574506998062134, "eval_runtime": 7.5545, "eval_samples_per_second": 8.339, "eval_steps_per_second": 1.059, "eval_wer": 0.7904818916492068, "step": 1040 }, { "epoch": 130.9, "eval_loss": 2.6720142364501953, "eval_runtime": 7.4821, "eval_samples_per_second": 8.42, "eval_steps_per_second": 1.069, "eval_wer": 0.7898832684824902, "step": 1048 }, { "epoch": 131.23, "learning_rate": 0.00014025316455696204, "loss": 1.0676, "step": 1050 }, { "epoch": 131.9, "eval_loss": 2.690053701400757, "eval_runtime": 7.499, "eval_samples_per_second": 8.401, "eval_steps_per_second": 1.067, "eval_wer": 0.8021550434001796, "step": 1056 }, { "epoch": 132.45, "learning_rate": 0.00013772151898734178, "loss": 1.0044, "step": 1060 }, { "epoch": 132.9, "eval_loss": 2.659367561340332, "eval_runtime": 7.5012, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.066, "eval_wer": 0.7773121819814427, "step": 1064 }, { "epoch": 133.68, "learning_rate": 0.00013518987341772153, "loss": 1.0276, "step": 1070 }, { "epoch": 133.9, "eval_loss": 2.4738552570343018, "eval_runtime": 7.4676, "eval_samples_per_second": 8.436, "eval_steps_per_second": 1.071, "eval_wer": 0.7931756958994313, "step": 1072 }, { "epoch": 134.9, "learning_rate": 0.00013265822784810127, "loss": 0.949, "step": 1080 }, { "epoch": 134.9, "eval_loss": 2.539825677871704, "eval_runtime": 7.5085, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.065, "eval_wer": 0.775516312481293, "step": 1080 }, { "epoch": 135.9, "eval_loss": 2.6266517639160156, "eval_runtime": 7.4936, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.068, "eval_wer": 0.7797066746483089, "step": 1088 }, { "epoch": 136.23, "learning_rate": 0.00013012658227848101, "loss": 1.0508, "step": 1090 }, { "epoch": 136.9, "eval_loss": 2.482872247695923, "eval_runtime": 7.5101, "eval_samples_per_second": 8.389, "eval_steps_per_second": 1.065, "eval_wer": 0.772223885064352, "step": 1096 }, { "epoch": 137.45, "learning_rate": 0.00012759493670886076, "loss": 0.9937, "step": 1100 }, { "epoch": 137.9, "eval_loss": 2.4289324283599854, "eval_runtime": 7.4931, "eval_samples_per_second": 8.408, "eval_steps_per_second": 1.068, "eval_wer": 0.777611493564801, "step": 1104 }, { "epoch": 138.68, "learning_rate": 0.0001250632911392405, "loss": 0.9677, "step": 1110 }, { "epoch": 138.9, "eval_loss": 2.584481954574585, "eval_runtime": 7.4633, "eval_samples_per_second": 8.441, "eval_steps_per_second": 1.072, "eval_wer": 0.7815025441484585, "step": 1112 }, { "epoch": 139.9, "learning_rate": 0.00012253164556962027, "loss": 1.0115, "step": 1120 }, { "epoch": 139.9, "eval_loss": 2.713249921798706, "eval_runtime": 7.4798, "eval_samples_per_second": 8.423, "eval_steps_per_second": 1.07, "eval_wer": 0.7704280155642024, "step": 1120 }, { "epoch": 140.9, "eval_loss": 2.429744005203247, "eval_runtime": 7.5782, "eval_samples_per_second": 8.313, "eval_steps_per_second": 1.056, "eval_wer": 0.7835977252319665, "step": 1128 }, { "epoch": 141.23, "learning_rate": 0.00012, "loss": 1.049, "step": 1130 }, { "epoch": 141.9, "eval_loss": 2.342973232269287, "eval_runtime": 7.5821, "eval_samples_per_second": 8.309, "eval_steps_per_second": 1.055, "eval_wer": 0.7829991020652499, "step": 1136 }, { "epoch": 142.45, "learning_rate": 0.00011746835443037976, "loss": 0.9412, "step": 1140 }, { "epoch": 142.9, "eval_loss": 2.6201610565185547, "eval_runtime": 7.5078, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.066, "eval_wer": 0.7698293923974858, "step": 1144 }, { "epoch": 143.68, "learning_rate": 0.00011493670886075949, "loss": 0.9647, "step": 1150 }, { "epoch": 143.9, "eval_loss": 2.507197141647339, "eval_runtime": 7.5411, "eval_samples_per_second": 8.354, "eval_steps_per_second": 1.061, "eval_wer": 0.7710266387309189, "step": 1152 }, { "epoch": 144.9, "learning_rate": 0.00011240506329113925, "loss": 0.9839, "step": 1160 }, { "epoch": 144.9, "eval_loss": 2.364020824432373, "eval_runtime": 7.5081, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.066, "eval_wer": 0.7752170008979348, "step": 1160 }, { "epoch": 145.9, "eval_loss": 2.3263261318206787, "eval_runtime": 7.4816, "eval_samples_per_second": 8.421, "eval_steps_per_second": 1.069, "eval_wer": 0.7803052978150254, "step": 1168 }, { "epoch": 146.23, "learning_rate": 0.000109873417721519, "loss": 1.0245, "step": 1170 }, { "epoch": 146.9, "eval_loss": 2.4204721450805664, "eval_runtime": 7.4826, "eval_samples_per_second": 8.42, "eval_steps_per_second": 1.069, "eval_wer": 0.7683328344806944, "step": 1176 }, { "epoch": 147.45, "learning_rate": 0.00010734177215189874, "loss": 0.9537, "step": 1180 }, { "epoch": 147.9, "eval_loss": 2.3592872619628906, "eval_runtime": 7.5011, "eval_samples_per_second": 8.399, "eval_steps_per_second": 1.067, "eval_wer": 0.7832984136486082, "step": 1184 }, { "epoch": 148.68, "learning_rate": 0.0001048101265822785, "loss": 0.9787, "step": 1190 }, { "epoch": 148.9, "eval_loss": 2.5318942070007324, "eval_runtime": 7.597, "eval_samples_per_second": 8.293, "eval_steps_per_second": 1.053, "eval_wer": 0.7740197545645017, "step": 1192 }, { "epoch": 149.9, "learning_rate": 0.00010227848101265822, "loss": 0.9443, "step": 1200 }, { "epoch": 149.9, "eval_loss": 2.492283582687378, "eval_runtime": 7.4715, "eval_samples_per_second": 8.432, "eval_steps_per_second": 1.071, "eval_wer": 0.7734211313977851, "step": 1200 }, { "epoch": 150.9, "eval_loss": 2.393569231033325, "eval_runtime": 7.4907, "eval_samples_per_second": 8.41, "eval_steps_per_second": 1.068, "eval_wer": 0.7725231966477103, "step": 1208 }, { "epoch": 151.23, "learning_rate": 9.974683544303798e-05, "loss": 1.0125, "step": 1210 }, { "epoch": 151.9, "eval_loss": 2.4754488468170166, "eval_runtime": 7.5067, "eval_samples_per_second": 8.393, "eval_steps_per_second": 1.066, "eval_wer": 0.761448668063454, "step": 1216 }, { "epoch": 152.45, "learning_rate": 9.721518987341773e-05, "loss": 0.943, "step": 1220 }, { "epoch": 152.9, "eval_loss": 2.434126615524292, "eval_runtime": 7.5336, "eval_samples_per_second": 8.362, "eval_steps_per_second": 1.062, "eval_wer": 0.7701287039808441, "step": 1224 }, { "epoch": 153.68, "learning_rate": 9.468354430379748e-05, "loss": 0.9482, "step": 1230 }, { "epoch": 153.9, "eval_loss": 2.423170328140259, "eval_runtime": 7.517, "eval_samples_per_second": 8.381, "eval_steps_per_second": 1.064, "eval_wer": 0.7698293923974858, "step": 1232 }, { "epoch": 154.9, "learning_rate": 9.215189873417723e-05, "loss": 0.8958, "step": 1240 }, { "epoch": 154.9, "eval_loss": 2.4942498207092285, "eval_runtime": 7.5276, "eval_samples_per_second": 8.369, "eval_steps_per_second": 1.063, "eval_wer": 0.7515713858126309, "step": 1240 }, { "epoch": 155.9, "eval_loss": 2.516117572784424, "eval_runtime": 7.574, "eval_samples_per_second": 8.318, "eval_steps_per_second": 1.056, "eval_wer": 0.7680335228973362, "step": 1248 }, { "epoch": 156.23, "learning_rate": 8.962025316455697e-05, "loss": 1.0073, "step": 1250 }, { "epoch": 156.9, "eval_loss": 2.533907890319824, "eval_runtime": 7.5082, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.066, "eval_wer": 0.7698293923974858, "step": 1256 }, { "epoch": 157.45, "learning_rate": 8.708860759493672e-05, "loss": 0.9784, "step": 1260 }, { "epoch": 157.9, "eval_loss": 2.4986894130706787, "eval_runtime": 7.5302, "eval_samples_per_second": 8.366, "eval_steps_per_second": 1.062, "eval_wer": 0.7560610595630051, "step": 1264 }, { "epoch": 158.68, "learning_rate": 8.455696202531646e-05, "loss": 0.904, "step": 1270 }, { "epoch": 158.9, "eval_loss": 2.4728736877441406, "eval_runtime": 7.5286, "eval_samples_per_second": 8.368, "eval_steps_per_second": 1.063, "eval_wer": 0.7560610595630051, "step": 1272 }, { "epoch": 159.9, "learning_rate": 8.202531645569622e-05, "loss": 0.9352, "step": 1280 }, { "epoch": 159.9, "eval_loss": 2.4667537212371826, "eval_runtime": 7.4806, "eval_samples_per_second": 8.422, "eval_steps_per_second": 1.069, "eval_wer": 0.7590541753965878, "step": 1280 }, { "epoch": 160.9, "eval_loss": 2.4546968936920166, "eval_runtime": 7.4781, "eval_samples_per_second": 8.425, "eval_steps_per_second": 1.07, "eval_wer": 0.7557617479796468, "step": 1288 }, { "epoch": 161.23, "learning_rate": 7.949367088607596e-05, "loss": 1.0036, "step": 1290 }, { "epoch": 161.9, "eval_loss": 2.6064584255218506, "eval_runtime": 7.5036, "eval_samples_per_second": 8.396, "eval_steps_per_second": 1.066, "eval_wer": 0.764741095480395, "step": 1296 }, { "epoch": 162.45, "learning_rate": 7.69620253164557e-05, "loss": 0.9437, "step": 1300 }, { "epoch": 162.9, "eval_loss": 2.5465590953826904, "eval_runtime": 7.6278, "eval_samples_per_second": 8.259, "eval_steps_per_second": 1.049, "eval_wer": 0.7641424723136785, "step": 1304 }, { "epoch": 163.68, "learning_rate": 7.443037974683545e-05, "loss": 0.8998, "step": 1310 }, { "epoch": 163.9, "eval_loss": 2.5044281482696533, "eval_runtime": 7.5244, "eval_samples_per_second": 8.373, "eval_steps_per_second": 1.063, "eval_wer": 0.7629452259802454, "step": 1312 }, { "epoch": 164.9, "learning_rate": 7.189873417721519e-05, "loss": 0.9195, "step": 1320 }, { "epoch": 164.9, "eval_loss": 2.4214060306549072, "eval_runtime": 7.5004, "eval_samples_per_second": 8.4, "eval_steps_per_second": 1.067, "eval_wer": 0.7632445375636037, "step": 1320 }, { "epoch": 165.9, "eval_loss": 2.3591132164001465, "eval_runtime": 7.4748, "eval_samples_per_second": 8.428, "eval_steps_per_second": 1.07, "eval_wer": 0.7608500448967375, "step": 1328 }, { "epoch": 166.23, "learning_rate": 6.936708860759494e-05, "loss": 0.9795, "step": 1330 }, { "epoch": 166.9, "eval_loss": 2.4736063480377197, "eval_runtime": 7.4961, "eval_samples_per_second": 8.404, "eval_steps_per_second": 1.067, "eval_wer": 0.7542651900628554, "step": 1336 }, { "epoch": 167.45, "learning_rate": 6.68354430379747e-05, "loss": 0.9041, "step": 1340 }, { "epoch": 167.9, "eval_loss": 2.4042837619781494, "eval_runtime": 7.5141, "eval_samples_per_second": 8.384, "eval_steps_per_second": 1.065, "eval_wer": 0.7596527985633044, "step": 1344 }, { "epoch": 168.68, "learning_rate": 6.430379746835444e-05, "loss": 0.9111, "step": 1350 }, { "epoch": 168.9, "eval_loss": 2.4593567848205566, "eval_runtime": 7.5475, "eval_samples_per_second": 8.347, "eval_steps_per_second": 1.06, "eval_wer": 0.7416941035618079, "step": 1352 }, { "epoch": 169.9, "learning_rate": 6.177215189873418e-05, "loss": 0.8902, "step": 1360 }, { "epoch": 169.9, "eval_loss": 2.425161361694336, "eval_runtime": 7.4767, "eval_samples_per_second": 8.426, "eval_steps_per_second": 1.07, "eval_wer": 0.7491768931457647, "step": 1360 }, { "epoch": 170.9, "eval_loss": 2.4007468223571777, "eval_runtime": 7.5101, "eval_samples_per_second": 8.389, "eval_steps_per_second": 1.065, "eval_wer": 0.7542651900628554, "step": 1368 }, { "epoch": 171.23, "learning_rate": 5.924050632911393e-05, "loss": 0.9956, "step": 1370 }, { "epoch": 171.9, "eval_loss": 2.3502612113952637, "eval_runtime": 7.4634, "eval_samples_per_second": 8.441, "eval_steps_per_second": 1.072, "eval_wer": 0.7605507333133792, "step": 1376 }, { "epoch": 172.45, "learning_rate": 5.670886075949368e-05, "loss": 0.8645, "step": 1380 }, { "epoch": 172.9, "eval_loss": 2.3732781410217285, "eval_runtime": 7.525, "eval_samples_per_second": 8.372, "eval_steps_per_second": 1.063, "eval_wer": 0.7470817120622568, "step": 1384 }, { "epoch": 173.68, "learning_rate": 5.417721518987342e-05, "loss": 0.8989, "step": 1390 }, { "epoch": 173.9, "eval_loss": 2.3426337242126465, "eval_runtime": 7.5601, "eval_samples_per_second": 8.333, "eval_steps_per_second": 1.058, "eval_wer": 0.7545645016462137, "step": 1392 }, { "epoch": 174.9, "learning_rate": 5.1645569620253165e-05, "loss": 0.8961, "step": 1400 }, { "epoch": 174.9, "eval_loss": 2.4074254035949707, "eval_runtime": 7.5242, "eval_samples_per_second": 8.373, "eval_steps_per_second": 1.063, "eval_wer": 0.7521700089793475, "step": 1400 }, { "epoch": 175.9, "eval_loss": 2.4200074672698975, "eval_runtime": 7.512, "eval_samples_per_second": 8.387, "eval_steps_per_second": 1.065, "eval_wer": 0.7545645016462137, "step": 1408 }, { "epoch": 176.23, "learning_rate": 4.9113924050632915e-05, "loss": 0.9481, "step": 1410 }, { "epoch": 176.9, "eval_loss": 2.361619234085083, "eval_runtime": 7.5221, "eval_samples_per_second": 8.375, "eval_steps_per_second": 1.064, "eval_wer": 0.7503741394791978, "step": 1416 }, { "epoch": 177.45, "learning_rate": 4.658227848101266e-05, "loss": 0.9347, "step": 1420 }, { "epoch": 177.9, "eval_loss": 2.3545103073120117, "eval_runtime": 7.5422, "eval_samples_per_second": 8.353, "eval_steps_per_second": 1.061, "eval_wer": 0.7539658784794971, "step": 1424 }, { "epoch": 178.68, "learning_rate": 4.405063291139241e-05, "loss": 0.9105, "step": 1430 }, { "epoch": 178.9, "eval_loss": 2.3841302394866943, "eval_runtime": 7.516, "eval_samples_per_second": 8.382, "eval_steps_per_second": 1.064, "eval_wer": 0.7539658784794971, "step": 1432 }, { "epoch": 179.9, "learning_rate": 4.1518987341772154e-05, "loss": 0.8936, "step": 1440 }, { "epoch": 179.9, "eval_loss": 2.4328415393829346, "eval_runtime": 7.5588, "eval_samples_per_second": 8.335, "eval_steps_per_second": 1.058, "eval_wer": 0.7530679437294223, "step": 1440 }, { "epoch": 180.9, "eval_loss": 2.4595539569854736, "eval_runtime": 7.5083, "eval_samples_per_second": 8.391, "eval_steps_per_second": 1.065, "eval_wer": 0.7473810236456151, "step": 1448 }, { "epoch": 181.23, "learning_rate": 3.8987341772151905e-05, "loss": 0.9511, "step": 1450 }, { "epoch": 181.9, "eval_loss": 2.4178130626678467, "eval_runtime": 7.5338, "eval_samples_per_second": 8.362, "eval_steps_per_second": 1.062, "eval_wer": 0.7509727626459144, "step": 1456 }, { "epoch": 182.45, "learning_rate": 3.645569620253165e-05, "loss": 0.8743, "step": 1460 }, { "epoch": 182.9, "eval_loss": 2.4075419902801514, "eval_runtime": 7.4937, "eval_samples_per_second": 8.407, "eval_steps_per_second": 1.068, "eval_wer": 0.7512720742292727, "step": 1464 }, { "epoch": 183.68, "learning_rate": 3.392405063291139e-05, "loss": 0.8905, "step": 1470 }, { "epoch": 183.9, "eval_loss": 2.3900365829467773, "eval_runtime": 7.5106, "eval_samples_per_second": 8.388, "eval_steps_per_second": 1.065, "eval_wer": 0.7524693205627058, "step": 1472 }, { "epoch": 184.9, "learning_rate": 3.1392405063291144e-05, "loss": 0.8968, "step": 1480 }, { "epoch": 184.9, "eval_loss": 2.4383199214935303, "eval_runtime": 7.5418, "eval_samples_per_second": 8.353, "eval_steps_per_second": 1.061, "eval_wer": 0.7542651900628554, "step": 1480 }, { "epoch": 185.9, "eval_loss": 2.4400503635406494, "eval_runtime": 7.5025, "eval_samples_per_second": 8.397, "eval_steps_per_second": 1.066, "eval_wer": 0.7518706973959892, "step": 1488 }, { "epoch": 186.23, "learning_rate": 2.8860759493670884e-05, "loss": 0.9459, "step": 1490 }, { "epoch": 186.9, "eval_loss": 2.4344472885131836, "eval_runtime": 7.4831, "eval_samples_per_second": 8.419, "eval_steps_per_second": 1.069, "eval_wer": 0.7497755163124813, "step": 1496 }, { "epoch": 187.45, "learning_rate": 2.6329113924050635e-05, "loss": 0.9273, "step": 1500 }, { "epoch": 187.9, "eval_loss": 2.417949914932251, "eval_runtime": 7.5261, "eval_samples_per_second": 8.371, "eval_steps_per_second": 1.063, "eval_wer": 0.7449865309787489, "step": 1504 }, { "epoch": 188.68, "learning_rate": 2.379746835443038e-05, "loss": 0.9523, "step": 1510 }, { "epoch": 188.9, "eval_loss": 2.4077460765838623, "eval_runtime": 7.5161, "eval_samples_per_second": 8.382, "eval_steps_per_second": 1.064, "eval_wer": 0.7524693205627058, "step": 1512 }, { "epoch": 189.9, "learning_rate": 2.1265822784810126e-05, "loss": 0.8903, "step": 1520 }, { "epoch": 189.9, "eval_loss": 2.4481801986694336, "eval_runtime": 7.5019, "eval_samples_per_second": 8.398, "eval_steps_per_second": 1.066, "eval_wer": 0.7521700089793475, "step": 1520 }, { "epoch": 190.9, "eval_loss": 2.4506967067718506, "eval_runtime": 7.5305, "eval_samples_per_second": 8.366, "eval_steps_per_second": 1.062, "eval_wer": 0.7572583058964382, "step": 1528 }, { "epoch": 191.23, "learning_rate": 1.8734177215189874e-05, "loss": 0.9759, "step": 1530 }, { "epoch": 191.9, "eval_loss": 2.4391028881073, "eval_runtime": 7.5878, "eval_samples_per_second": 8.303, "eval_steps_per_second": 1.054, "eval_wer": 0.7563603711463633, "step": 1536 }, { "epoch": 192.45, "learning_rate": 1.620253164556962e-05, "loss": 0.887, "step": 1540 }, { "epoch": 192.9, "eval_loss": 2.4516143798828125, "eval_runtime": 7.5568, "eval_samples_per_second": 8.337, "eval_steps_per_second": 1.059, "eval_wer": 0.7521700089793475, "step": 1544 }, { "epoch": 193.68, "learning_rate": 1.3670886075949368e-05, "loss": 0.8796, "step": 1550 }, { "epoch": 193.9, "eval_loss": 2.440375328063965, "eval_runtime": 7.5815, "eval_samples_per_second": 8.31, "eval_steps_per_second": 1.055, "eval_wer": 0.7542651900628554, "step": 1552 }, { "epoch": 194.9, "learning_rate": 1.1139240506329114e-05, "loss": 0.861, "step": 1560 }, { "epoch": 194.9, "eval_loss": 2.4268441200256348, "eval_runtime": 7.5115, "eval_samples_per_second": 8.387, "eval_steps_per_second": 1.065, "eval_wer": 0.7518706973959892, "step": 1560 }, { "epoch": 195.9, "eval_loss": 2.4319472312927246, "eval_runtime": 7.4835, "eval_samples_per_second": 8.418, "eval_steps_per_second": 1.069, "eval_wer": 0.7506734510625561, "step": 1568 }, { "epoch": 196.23, "learning_rate": 8.607594936708861e-06, "loss": 0.9349, "step": 1570 }, { "epoch": 196.9, "eval_loss": 2.422971487045288, "eval_runtime": 7.5236, "eval_samples_per_second": 8.374, "eval_steps_per_second": 1.063, "eval_wer": 0.7539658784794971, "step": 1576 }, { "epoch": 197.45, "learning_rate": 6.075949367088608e-06, "loss": 0.9154, "step": 1580 }, { "epoch": 197.9, "eval_loss": 2.4296085834503174, "eval_runtime": 7.5409, "eval_samples_per_second": 8.354, "eval_steps_per_second": 1.061, "eval_wer": 0.7557617479796468, "step": 1584 }, { "epoch": 198.68, "learning_rate": 3.544303797468355e-06, "loss": 0.8695, "step": 1590 }, { "epoch": 198.9, "eval_loss": 2.430819511413574, "eval_runtime": 7.5825, "eval_samples_per_second": 8.309, "eval_steps_per_second": 1.055, "eval_wer": 0.7557617479796468, "step": 1592 }, { "epoch": 199.9, "learning_rate": 1.0126582278481013e-06, "loss": 0.8754, "step": 1600 }, { "epoch": 199.9, "eval_loss": 2.428027868270874, "eval_runtime": 7.4856, "eval_samples_per_second": 8.416, "eval_steps_per_second": 1.069, "eval_wer": 0.7545645016462137, "step": 1600 }, { "epoch": 199.9, "step": 1600, "total_flos": 1.1848602213269387e+19, "train_loss": 2.0372400057315825, "train_runtime": 18220.1543, "train_samples_per_second": 6.18, "train_steps_per_second": 0.088 } ], "max_steps": 1600, "num_train_epochs": 200, "total_flos": 1.1848602213269387e+19, "trial_name": null, "trial_params": null }