{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 18300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 47.84418869018555, "eval_runtime": 186.8063, "eval_samples_per_second": 19.587, "eval_steps_per_second": 1.226, "eval_wer": 1.0, "step": 183 }, { "epoch": 2.0, "eval_loss": 6.310945510864258, "eval_runtime": 163.2985, "eval_samples_per_second": 22.407, "eval_steps_per_second": 1.402, "eval_wer": 1.0, "step": 366 }, { "epoch": 2.73, "learning_rate": 0.00029699999999999996, "loss": 41.8902, "step": 500 }, { "epoch": 3.0, "eval_loss": 6.239192008972168, "eval_runtime": 163.0201, "eval_samples_per_second": 22.445, "eval_steps_per_second": 1.405, "eval_wer": 1.0, "step": 549 }, { "epoch": 4.0, "eval_loss": 5.973925590515137, "eval_runtime": 163.0214, "eval_samples_per_second": 22.445, "eval_steps_per_second": 1.405, "eval_wer": 1.1123227917121048, "step": 732 }, { "epoch": 5.0, "eval_loss": 4.901411056518555, "eval_runtime": 162.8715, "eval_samples_per_second": 22.466, "eval_steps_per_second": 1.406, "eval_wer": 1.9473827699018538, "step": 915 }, { "epoch": 5.46, "learning_rate": 0.00018834586466165413, "loss": 5.5817, "step": 1000 }, { "epoch": 6.0, "eval_loss": 3.9892334938049316, "eval_runtime": 163.2053, "eval_samples_per_second": 22.42, "eval_steps_per_second": 1.403, "eval_wer": 1.0188113413304254, "step": 1098 }, { "epoch": 7.0, "eval_loss": 3.5080456733703613, "eval_runtime": 162.7362, "eval_samples_per_second": 22.484, "eval_steps_per_second": 1.407, "eval_wer": 1.0103598691384952, "step": 1281 }, { "epoch": 8.0, "eval_loss": 3.0796852111816406, "eval_runtime": 164.2666, "eval_samples_per_second": 22.275, "eval_steps_per_second": 1.394, "eval_wer": 0.9904580152671756, "step": 1464 }, { "epoch": 8.2, "learning_rate": 7.556390977443609e-05, "loss": 3.5579, "step": 1500 }, { "epoch": 9.0, "eval_loss": 2.8110806941986084, "eval_runtime": 168.541, "eval_samples_per_second": 21.71, "eval_steps_per_second": 1.359, "eval_wer": 0.9836423118865867, "step": 1647 }, { "epoch": 10.0, "eval_loss": 2.67260479927063, "eval_runtime": 165.7448, "eval_samples_per_second": 22.076, "eval_steps_per_second": 1.382, "eval_wer": 0.9814612868047983, "step": 1830 }, { "epoch": 10.93, "learning_rate": 0.00027480337078651684, "loss": 2.7771, "step": 2000 }, { "epoch": 11.0, "eval_loss": 2.7177391052246094, "eval_runtime": 191.8821, "eval_samples_per_second": 19.069, "eval_steps_per_second": 1.193, "eval_wer": 0.9809160305343512, "step": 2013 }, { "epoch": 12.0, "eval_loss": 2.3581743240356445, "eval_runtime": 168.9095, "eval_samples_per_second": 21.662, "eval_steps_per_second": 1.356, "eval_wer": 0.9691930207197382, "step": 2196 }, { "epoch": 13.0, "eval_loss": 2.1708498001098633, "eval_runtime": 167.2979, "eval_samples_per_second": 21.871, "eval_steps_per_second": 1.369, "eval_wer": 0.9757360959651036, "step": 2379 }, { "epoch": 13.66, "learning_rate": 0.000266376404494382, "loss": 2.3488, "step": 2500 }, { "epoch": 14.0, "eval_loss": 2.049051284790039, "eval_runtime": 169.536, "eval_samples_per_second": 21.582, "eval_steps_per_second": 1.351, "eval_wer": 0.9525627044711014, "step": 2562 }, { "epoch": 15.0, "eval_loss": 1.8517801761627197, "eval_runtime": 166.9821, "eval_samples_per_second": 21.913, "eval_steps_per_second": 1.371, "eval_wer": 0.9378407851690295, "step": 2745 }, { "epoch": 16.0, "eval_loss": 1.6844531297683716, "eval_runtime": 166.3633, "eval_samples_per_second": 21.994, "eval_steps_per_second": 1.377, "eval_wer": 0.9285714285714286, "step": 2928 }, { "epoch": 16.39, "learning_rate": 0.00025794943820224716, "loss": 1.7859, "step": 3000 }, { "epoch": 17.0, "eval_loss": 1.641157627105713, "eval_runtime": 165.2481, "eval_samples_per_second": 22.142, "eval_steps_per_second": 1.386, "eval_wer": 0.9280261723009815, "step": 3111 }, { "epoch": 18.0, "eval_loss": 1.5487942695617676, "eval_runtime": 166.3028, "eval_samples_per_second": 22.002, "eval_steps_per_second": 1.377, "eval_wer": 0.9034896401308615, "step": 3294 }, { "epoch": 19.0, "eval_loss": 1.4545683860778809, "eval_runtime": 169.7236, "eval_samples_per_second": 21.559, "eval_steps_per_second": 1.349, "eval_wer": 0.9010359869138496, "step": 3477 }, { "epoch": 19.13, "learning_rate": 0.00024952247191011235, "loss": 1.3898, "step": 3500 }, { "epoch": 20.0, "eval_loss": 1.5146596431732178, "eval_runtime": 170.5824, "eval_samples_per_second": 21.45, "eval_steps_per_second": 1.342, "eval_wer": 0.9201199563794984, "step": 3660 }, { "epoch": 21.0, "eval_loss": 1.446706771850586, "eval_runtime": 164.7146, "eval_samples_per_second": 22.214, "eval_steps_per_second": 1.39, "eval_wer": 0.895856052344602, "step": 3843 }, { "epoch": 21.86, "learning_rate": 0.00024111235955056177, "loss": 1.1291, "step": 4000 }, { "epoch": 22.0, "eval_loss": 1.474255084991455, "eval_runtime": 166.7828, "eval_samples_per_second": 21.939, "eval_steps_per_second": 1.373, "eval_wer": 0.9034896401308615, "step": 4026 }, { "epoch": 23.0, "eval_loss": 1.3826948404312134, "eval_runtime": 165.2382, "eval_samples_per_second": 22.144, "eval_steps_per_second": 1.386, "eval_wer": 0.876226826608506, "step": 4209 }, { "epoch": 24.0, "eval_loss": 1.3436840772628784, "eval_runtime": 167.9623, "eval_samples_per_second": 21.785, "eval_steps_per_second": 1.363, "eval_wer": 0.8792257360959651, "step": 4392 }, { "epoch": 24.59, "learning_rate": 0.00023268539325842696, "loss": 0.8993, "step": 4500 }, { "epoch": 25.0, "eval_loss": 1.2894562482833862, "eval_runtime": 179.0778, "eval_samples_per_second": 20.432, "eval_steps_per_second": 1.279, "eval_wer": 0.8576881134133042, "step": 4575 }, { "epoch": 26.0, "eval_loss": 1.2927522659301758, "eval_runtime": 173.2382, "eval_samples_per_second": 21.121, "eval_steps_per_second": 1.322, "eval_wer": 0.8557797164667393, "step": 4758 }, { "epoch": 27.0, "eval_loss": 1.294653058052063, "eval_runtime": 166.3679, "eval_samples_per_second": 21.993, "eval_steps_per_second": 1.376, "eval_wer": 0.9163031624863686, "step": 4941 }, { "epoch": 27.32, "learning_rate": 0.0002242584269662921, "loss": 0.6298, "step": 5000 }, { "epoch": 28.0, "eval_loss": 1.3150520324707031, "eval_runtime": 164.8819, "eval_samples_per_second": 22.192, "eval_steps_per_second": 1.389, "eval_wer": 0.873773173391494, "step": 5124 }, { "epoch": 29.0, "eval_loss": 1.2971961498260498, "eval_runtime": 163.7658, "eval_samples_per_second": 22.343, "eval_steps_per_second": 1.398, "eval_wer": 0.8514176663031625, "step": 5307 }, { "epoch": 30.0, "eval_loss": 1.302983283996582, "eval_runtime": 162.8739, "eval_samples_per_second": 22.465, "eval_steps_per_second": 1.406, "eval_wer": 0.8432388222464559, "step": 5490 }, { "epoch": 30.05, "learning_rate": 0.00021583146067415728, "loss": 0.4757, "step": 5500 }, { "epoch": 31.0, "eval_loss": 1.3263603448867798, "eval_runtime": 163.1422, "eval_samples_per_second": 22.428, "eval_steps_per_second": 1.404, "eval_wer": 0.836423118865867, "step": 5673 }, { "epoch": 32.0, "eval_loss": 1.3130974769592285, "eval_runtime": 162.7813, "eval_samples_per_second": 22.478, "eval_steps_per_second": 1.407, "eval_wer": 0.8421483097055616, "step": 5856 }, { "epoch": 32.79, "learning_rate": 0.00020740449438202247, "loss": 0.3735, "step": 6000 }, { "epoch": 33.0, "eval_loss": 1.3457393646240234, "eval_runtime": 164.771, "eval_samples_per_second": 22.207, "eval_steps_per_second": 1.39, "eval_wer": 0.8587786259541985, "step": 6039 }, { "epoch": 34.0, "eval_loss": 1.3450396060943604, "eval_runtime": 163.6185, "eval_samples_per_second": 22.363, "eval_steps_per_second": 1.4, "eval_wer": 0.8473282442748091, "step": 6222 }, { "epoch": 35.0, "eval_loss": 1.3451658487319946, "eval_runtime": 167.7067, "eval_samples_per_second": 21.818, "eval_steps_per_second": 1.365, "eval_wer": 0.9217557251908397, "step": 6405 }, { "epoch": 35.52, "learning_rate": 0.0001989775280898876, "loss": 0.3253, "step": 6500 }, { "epoch": 36.0, "eval_loss": 1.375409722328186, "eval_runtime": 164.3009, "eval_samples_per_second": 22.27, "eval_steps_per_second": 1.394, "eval_wer": 0.8396946564885496, "step": 6588 }, { "epoch": 37.0, "eval_loss": 1.3554260730743408, "eval_runtime": 163.3464, "eval_samples_per_second": 22.4, "eval_steps_per_second": 1.402, "eval_wer": 0.8353326063249727, "step": 6771 }, { "epoch": 38.0, "eval_loss": 1.353210687637329, "eval_runtime": 166.4062, "eval_samples_per_second": 21.988, "eval_steps_per_second": 1.376, "eval_wer": 0.8312431842966194, "step": 6954 }, { "epoch": 38.25, "learning_rate": 0.0001905505617977528, "loss": 0.2816, "step": 7000 }, { "epoch": 39.0, "eval_loss": 1.369396686553955, "eval_runtime": 165.9264, "eval_samples_per_second": 22.052, "eval_steps_per_second": 1.38, "eval_wer": 0.8345147219193021, "step": 7137 }, { "epoch": 40.0, "eval_loss": 1.395269751548767, "eval_runtime": 163.891, "eval_samples_per_second": 22.326, "eval_steps_per_second": 1.397, "eval_wer": 0.829607415485278, "step": 7320 }, { "epoch": 40.98, "learning_rate": 0.00018212359550561795, "loss": 0.2397, "step": 7500 }, { "epoch": 41.0, "eval_loss": 1.385826826095581, "eval_runtime": 165.4232, "eval_samples_per_second": 22.119, "eval_steps_per_second": 1.384, "eval_wer": 0.8293347873500545, "step": 7503 }, { "epoch": 42.0, "eval_loss": 1.3958967924118042, "eval_runtime": 163.1457, "eval_samples_per_second": 22.428, "eval_steps_per_second": 1.404, "eval_wer": 0.8402399127589967, "step": 7686 }, { "epoch": 43.0, "eval_loss": 1.435033917427063, "eval_runtime": 163.4766, "eval_samples_per_second": 22.382, "eval_steps_per_second": 1.401, "eval_wer": 0.9318429661941112, "step": 7869 }, { "epoch": 43.72, "learning_rate": 0.0001737134831460674, "loss": 0.2084, "step": 8000 }, { "epoch": 44.0, "eval_loss": 1.4003560543060303, "eval_runtime": 164.4371, "eval_samples_per_second": 22.252, "eval_steps_per_second": 1.393, "eval_wer": 0.8805888767720829, "step": 8052 }, { "epoch": 45.0, "eval_loss": 1.387134075164795, "eval_runtime": 164.8129, "eval_samples_per_second": 22.201, "eval_steps_per_second": 1.389, "eval_wer": 0.8255179934569248, "step": 8235 }, { "epoch": 46.0, "eval_loss": 1.4059827327728271, "eval_runtime": 166.2017, "eval_samples_per_second": 22.015, "eval_steps_per_second": 1.378, "eval_wer": 0.8252453653217012, "step": 8418 }, { "epoch": 46.45, "learning_rate": 0.00016528651685393257, "loss": 0.1853, "step": 8500 }, { "epoch": 47.0, "eval_loss": 1.3992133140563965, "eval_runtime": 164.2693, "eval_samples_per_second": 22.274, "eval_steps_per_second": 1.394, "eval_wer": 0.8500545256270448, "step": 8601 }, { "epoch": 48.0, "eval_loss": 1.4186208248138428, "eval_runtime": 162.8365, "eval_samples_per_second": 22.47, "eval_steps_per_second": 1.406, "eval_wer": 0.8252453653217012, "step": 8784 }, { "epoch": 49.0, "eval_loss": 1.4120242595672607, "eval_runtime": 164.9525, "eval_samples_per_second": 22.182, "eval_steps_per_second": 1.388, "eval_wer": 0.8165212649945475, "step": 8967 }, { "epoch": 49.18, "learning_rate": 0.00015685955056179775, "loss": 0.1671, "step": 9000 }, { "epoch": 50.0, "eval_loss": 1.4165652990341187, "eval_runtime": 173.8761, "eval_samples_per_second": 21.044, "eval_steps_per_second": 1.317, "eval_wer": 0.8214285714285714, "step": 9150 }, { "epoch": 51.0, "eval_loss": 1.4411484003067017, "eval_runtime": 165.9975, "eval_samples_per_second": 22.043, "eval_steps_per_second": 1.38, "eval_wer": 0.8500545256270448, "step": 9333 }, { "epoch": 51.91, "learning_rate": 0.00014843258426966292, "loss": 0.1513, "step": 9500 }, { "epoch": 52.0, "eval_loss": 1.4692339897155762, "eval_runtime": 163.251, "eval_samples_per_second": 22.413, "eval_steps_per_second": 1.403, "eval_wer": 0.839422028353326, "step": 9516 }, { "epoch": 53.0, "eval_loss": 1.4640177488327026, "eval_runtime": 162.0066, "eval_samples_per_second": 22.586, "eval_steps_per_second": 1.414, "eval_wer": 0.8391494002181025, "step": 9699 }, { "epoch": 54.0, "eval_loss": 1.450060486793518, "eval_runtime": 165.9207, "eval_samples_per_second": 22.053, "eval_steps_per_second": 1.38, "eval_wer": 0.8418756815703381, "step": 9882 }, { "epoch": 54.64, "learning_rate": 0.00014002247191011234, "loss": 0.133, "step": 10000 }, { "epoch": 55.0, "eval_loss": 1.4133520126342773, "eval_runtime": 163.1361, "eval_samples_per_second": 22.429, "eval_steps_per_second": 1.404, "eval_wer": 0.8350599781897492, "step": 10065 }, { "epoch": 56.0, "eval_loss": 1.4592992067337036, "eval_runtime": 161.8579, "eval_samples_per_second": 22.606, "eval_steps_per_second": 1.415, "eval_wer": 0.8405125408942202, "step": 10248 }, { "epoch": 57.0, "eval_loss": 1.45597505569458, "eval_runtime": 161.821, "eval_samples_per_second": 22.611, "eval_steps_per_second": 1.415, "eval_wer": 0.8388767720828789, "step": 10431 }, { "epoch": 57.38, "learning_rate": 0.00013159550561797753, "loss": 0.1198, "step": 10500 }, { "epoch": 58.0, "eval_loss": 1.4733554124832153, "eval_runtime": 162.1949, "eval_samples_per_second": 22.559, "eval_steps_per_second": 1.412, "eval_wer": 0.8334242093784079, "step": 10614 }, { "epoch": 59.0, "eval_loss": 1.4649208784103394, "eval_runtime": 162.1709, "eval_samples_per_second": 22.563, "eval_steps_per_second": 1.412, "eval_wer": 0.8317884405670665, "step": 10797 }, { "epoch": 60.0, "eval_loss": 1.4659123420715332, "eval_runtime": 163.9506, "eval_samples_per_second": 22.318, "eval_steps_per_second": 1.397, "eval_wer": 0.8099781897491821, "step": 10980 }, { "epoch": 60.11, "learning_rate": 0.0001231685393258427, "loss": 0.1109, "step": 11000 }, { "epoch": 61.0, "eval_loss": 1.4783909320831299, "eval_runtime": 165.1461, "eval_samples_per_second": 22.156, "eval_steps_per_second": 1.387, "eval_wer": 0.811886586695747, "step": 11163 }, { "epoch": 62.0, "eval_loss": 1.493830680847168, "eval_runtime": 164.5944, "eval_samples_per_second": 22.23, "eval_steps_per_second": 1.391, "eval_wer": 0.8148854961832062, "step": 11346 }, { "epoch": 62.84, "learning_rate": 0.00011474157303370785, "loss": 0.1063, "step": 11500 }, { "epoch": 63.0, "eval_loss": 1.5050164461135864, "eval_runtime": 163.3536, "eval_samples_per_second": 22.399, "eval_steps_per_second": 1.402, "eval_wer": 0.8151581243184297, "step": 11529 }, { "epoch": 64.0, "eval_loss": 1.4773460626602173, "eval_runtime": 168.4977, "eval_samples_per_second": 21.715, "eval_steps_per_second": 1.359, "eval_wer": 0.8176117775354417, "step": 11712 }, { "epoch": 65.0, "eval_loss": 1.4835622310638428, "eval_runtime": 166.8711, "eval_samples_per_second": 21.927, "eval_steps_per_second": 1.372, "eval_wer": 0.8260632497273719, "step": 11895 }, { "epoch": 65.57, "learning_rate": 0.00010631460674157301, "loss": 0.0966, "step": 12000 }, { "epoch": 66.0, "eval_loss": 1.4978560209274292, "eval_runtime": 162.3032, "eval_samples_per_second": 22.544, "eval_steps_per_second": 1.411, "eval_wer": 0.8157033805888768, "step": 12078 }, { "epoch": 67.0, "eval_loss": 1.4603493213653564, "eval_runtime": 162.8062, "eval_samples_per_second": 22.475, "eval_steps_per_second": 1.407, "eval_wer": 0.8047982551799345, "step": 12261 }, { "epoch": 68.0, "eval_loss": 1.4802541732788086, "eval_runtime": 169.4775, "eval_samples_per_second": 21.59, "eval_steps_per_second": 1.351, "eval_wer": 0.8127044711014176, "step": 12444 }, { "epoch": 68.31, "learning_rate": 9.790449438202247e-05, "loss": 0.0867, "step": 12500 }, { "epoch": 69.0, "eval_loss": 1.4973595142364502, "eval_runtime": 164.1372, "eval_samples_per_second": 22.292, "eval_steps_per_second": 1.395, "eval_wer": 0.8129770992366412, "step": 12627 }, { "epoch": 70.0, "eval_loss": 1.4721262454986572, "eval_runtime": 163.536, "eval_samples_per_second": 22.374, "eval_steps_per_second": 1.4, "eval_wer": 0.8077971646673937, "step": 12810 }, { "epoch": 71.0, "eval_loss": 1.4643745422363281, "eval_runtime": 166.8819, "eval_samples_per_second": 21.926, "eval_steps_per_second": 1.372, "eval_wer": 0.819247546346783, "step": 12993 }, { "epoch": 71.04, "learning_rate": 8.947752808988763e-05, "loss": 0.0827, "step": 13000 }, { "epoch": 72.0, "eval_loss": 1.4834694862365723, "eval_runtime": 167.6508, "eval_samples_per_second": 21.825, "eval_steps_per_second": 1.366, "eval_wer": 0.8137949836423118, "step": 13176 }, { "epoch": 73.0, "eval_loss": 1.4933878183364868, "eval_runtime": 170.3063, "eval_samples_per_second": 21.485, "eval_steps_per_second": 1.345, "eval_wer": 0.8121592148309705, "step": 13359 }, { "epoch": 73.77, "learning_rate": 8.10505617977528e-05, "loss": 0.0734, "step": 13500 }, { "epoch": 74.0, "eval_loss": 1.4950696229934692, "eval_runtime": 175.7645, "eval_samples_per_second": 20.818, "eval_steps_per_second": 1.303, "eval_wer": 0.8061613958560524, "step": 13542 }, { "epoch": 75.0, "eval_loss": 1.490771770477295, "eval_runtime": 168.0913, "eval_samples_per_second": 21.768, "eval_steps_per_second": 1.362, "eval_wer": 0.806979280261723, "step": 13725 }, { "epoch": 76.0, "eval_loss": 1.4876092672348022, "eval_runtime": 166.128, "eval_samples_per_second": 22.025, "eval_steps_per_second": 1.378, "eval_wer": 0.8124318429661941, "step": 13908 }, { "epoch": 76.5, "learning_rate": 7.262359550561797e-05, "loss": 0.0664, "step": 14000 }, { "epoch": 77.0, "eval_loss": 1.493386149406433, "eval_runtime": 166.8817, "eval_samples_per_second": 21.926, "eval_steps_per_second": 1.372, "eval_wer": 0.8053435114503816, "step": 14091 }, { "epoch": 78.0, "eval_loss": 1.4603490829467773, "eval_runtime": 169.3203, "eval_samples_per_second": 21.61, "eval_steps_per_second": 1.352, "eval_wer": 0.8047982551799345, "step": 14274 }, { "epoch": 79.0, "eval_loss": 1.4732308387756348, "eval_runtime": 165.2553, "eval_samples_per_second": 22.142, "eval_steps_per_second": 1.386, "eval_wer": 0.8072519083969466, "step": 14457 }, { "epoch": 79.23, "learning_rate": 6.42134831460674e-05, "loss": 0.0602, "step": 14500 }, { "epoch": 80.0, "eval_loss": 1.4924767017364502, "eval_runtime": 166.2646, "eval_samples_per_second": 22.007, "eval_steps_per_second": 1.377, "eval_wer": 0.8077971646673937, "step": 14640 }, { "epoch": 81.0, "eval_loss": 1.4812291860580444, "eval_runtime": 166.8878, "eval_samples_per_second": 21.925, "eval_steps_per_second": 1.372, "eval_wer": 0.806434023991276, "step": 14823 }, { "epoch": 81.97, "learning_rate": 5.578651685393258e-05, "loss": 0.057, "step": 15000 }, { "epoch": 82.0, "eval_loss": 1.4949839115142822, "eval_runtime": 166.616, "eval_samples_per_second": 21.961, "eval_steps_per_second": 1.374, "eval_wer": 0.8012540894220284, "step": 15006 }, { "epoch": 83.0, "eval_loss": 1.4784878492355347, "eval_runtime": 172.4918, "eval_samples_per_second": 21.213, "eval_steps_per_second": 1.328, "eval_wer": 0.8056161395856052, "step": 15189 }, { "epoch": 84.0, "eval_loss": 1.485625982284546, "eval_runtime": 165.5579, "eval_samples_per_second": 22.101, "eval_steps_per_second": 1.383, "eval_wer": 0.7993456924754635, "step": 15372 }, { "epoch": 84.7, "learning_rate": 4.735955056179775e-05, "loss": 0.0517, "step": 15500 }, { "epoch": 85.0, "eval_loss": 1.4754849672317505, "eval_runtime": 168.7909, "eval_samples_per_second": 21.678, "eval_steps_per_second": 1.357, "eval_wer": 0.8034351145038168, "step": 15555 }, { "epoch": 86.0, "eval_loss": 1.4813350439071655, "eval_runtime": 168.272, "eval_samples_per_second": 21.745, "eval_steps_per_second": 1.361, "eval_wer": 0.8034351145038168, "step": 15738 }, { "epoch": 87.0, "eval_loss": 1.4965763092041016, "eval_runtime": 166.9884, "eval_samples_per_second": 21.912, "eval_steps_per_second": 1.371, "eval_wer": 0.8047982551799345, "step": 15921 }, { "epoch": 87.43, "learning_rate": 3.893258426966292e-05, "loss": 0.0468, "step": 16000 }, { "epoch": 88.0, "eval_loss": 1.4883418083190918, "eval_runtime": 166.1387, "eval_samples_per_second": 22.024, "eval_steps_per_second": 1.378, "eval_wer": 0.8001635768811342, "step": 16104 }, { "epoch": 89.0, "eval_loss": 1.4746148586273193, "eval_runtime": 165.9654, "eval_samples_per_second": 22.047, "eval_steps_per_second": 1.38, "eval_wer": 0.8023446019629226, "step": 16287 }, { "epoch": 90.0, "eval_loss": 1.4697260856628418, "eval_runtime": 166.9567, "eval_samples_per_second": 21.916, "eval_steps_per_second": 1.372, "eval_wer": 0.7974372955288986, "step": 16470 }, { "epoch": 90.16, "learning_rate": 3.0505617977528088e-05, "loss": 0.0426, "step": 16500 }, { "epoch": 91.0, "eval_loss": 1.4775140285491943, "eval_runtime": 165.779, "eval_samples_per_second": 22.072, "eval_steps_per_second": 1.381, "eval_wer": 0.8004362050163577, "step": 16653 }, { "epoch": 92.0, "eval_loss": 1.4852207899093628, "eval_runtime": 173.415, "eval_samples_per_second": 21.1, "eval_steps_per_second": 1.321, "eval_wer": 0.8023446019629226, "step": 16836 }, { "epoch": 92.9, "learning_rate": 2.2078651685393255e-05, "loss": 0.0387, "step": 17000 }, { "epoch": 93.0, "eval_loss": 1.4868098497390747, "eval_runtime": 166.0768, "eval_samples_per_second": 22.032, "eval_steps_per_second": 1.379, "eval_wer": 0.8004362050163577, "step": 17019 }, { "epoch": 94.0, "eval_loss": 1.47845458984375, "eval_runtime": 171.2193, "eval_samples_per_second": 21.37, "eval_steps_per_second": 1.337, "eval_wer": 0.802071973827699, "step": 17202 }, { "epoch": 95.0, "eval_loss": 1.4892385005950928, "eval_runtime": 165.5064, "eval_samples_per_second": 22.108, "eval_steps_per_second": 1.384, "eval_wer": 0.8015267175572519, "step": 17385 }, { "epoch": 95.63, "learning_rate": 1.3668539325842695e-05, "loss": 0.0359, "step": 17500 }, { "epoch": 96.0, "eval_loss": 1.486182451248169, "eval_runtime": 174.5056, "eval_samples_per_second": 20.968, "eval_steps_per_second": 1.312, "eval_wer": 0.8017993456924755, "step": 17568 }, { "epoch": 97.0, "eval_loss": 1.4851171970367432, "eval_runtime": 165.0543, "eval_samples_per_second": 22.168, "eval_steps_per_second": 1.387, "eval_wer": 0.8007088331515813, "step": 17751 }, { "epoch": 98.0, "eval_loss": 1.48457932472229, "eval_runtime": 169.1905, "eval_samples_per_second": 21.627, "eval_steps_per_second": 1.354, "eval_wer": 0.7998909487459106, "step": 17934 }, { "epoch": 98.36, "learning_rate": 5.241573033707864e-06, "loss": 0.0347, "step": 18000 }, { "epoch": 99.0, "eval_loss": 1.4852174520492554, "eval_runtime": 168.792, "eval_samples_per_second": 21.678, "eval_steps_per_second": 1.357, "eval_wer": 0.7993456924754635, "step": 18117 }, { "epoch": 100.0, "eval_loss": 1.484755277633667, "eval_runtime": 179.7891, "eval_samples_per_second": 20.352, "eval_steps_per_second": 1.274, "eval_wer": 0.8004362050163577, "step": 18300 }, { "epoch": 100.0, "step": 18300, "total_flos": 1.3576650149787481e+20, "train_loss": 0.34965579027686616, "train_runtime": 66008.5183, "train_samples_per_second": 17.704, "train_steps_per_second": 0.277 } ], "max_steps": 18300, "num_train_epochs": 100, "total_flos": 1.3576650149787481e+20, "trial_name": null, "trial_params": null }