|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 18300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 47.84418869018555, |
|
"eval_runtime": 186.8063, |
|
"eval_samples_per_second": 19.587, |
|
"eval_steps_per_second": 1.226, |
|
"eval_wer": 1.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 6.310945510864258, |
|
"eval_runtime": 163.2985, |
|
"eval_samples_per_second": 22.407, |
|
"eval_steps_per_second": 1.402, |
|
"eval_wer": 1.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 41.8902, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 6.239192008972168, |
|
"eval_runtime": 163.0201, |
|
"eval_samples_per_second": 22.445, |
|
"eval_steps_per_second": 1.405, |
|
"eval_wer": 1.0, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 5.973925590515137, |
|
"eval_runtime": 163.0214, |
|
"eval_samples_per_second": 22.445, |
|
"eval_steps_per_second": 1.405, |
|
"eval_wer": 1.1123227917121048, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 4.901411056518555, |
|
"eval_runtime": 162.8715, |
|
"eval_samples_per_second": 22.466, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 1.9473827699018538, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00018834586466165413, |
|
"loss": 5.5817, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.9892334938049316, |
|
"eval_runtime": 163.2053, |
|
"eval_samples_per_second": 22.42, |
|
"eval_steps_per_second": 1.403, |
|
"eval_wer": 1.0188113413304254, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 3.5080456733703613, |
|
"eval_runtime": 162.7362, |
|
"eval_samples_per_second": 22.484, |
|
"eval_steps_per_second": 1.407, |
|
"eval_wer": 1.0103598691384952, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.0796852111816406, |
|
"eval_runtime": 164.2666, |
|
"eval_samples_per_second": 22.275, |
|
"eval_steps_per_second": 1.394, |
|
"eval_wer": 0.9904580152671756, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 7.556390977443609e-05, |
|
"loss": 3.5579, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.8110806941986084, |
|
"eval_runtime": 168.541, |
|
"eval_samples_per_second": 21.71, |
|
"eval_steps_per_second": 1.359, |
|
"eval_wer": 0.9836423118865867, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.67260479927063, |
|
"eval_runtime": 165.7448, |
|
"eval_samples_per_second": 22.076, |
|
"eval_steps_per_second": 1.382, |
|
"eval_wer": 0.9814612868047983, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.00027480337078651684, |
|
"loss": 2.7771, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.7177391052246094, |
|
"eval_runtime": 191.8821, |
|
"eval_samples_per_second": 19.069, |
|
"eval_steps_per_second": 1.193, |
|
"eval_wer": 0.9809160305343512, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.3581743240356445, |
|
"eval_runtime": 168.9095, |
|
"eval_samples_per_second": 21.662, |
|
"eval_steps_per_second": 1.356, |
|
"eval_wer": 0.9691930207197382, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.1708498001098633, |
|
"eval_runtime": 167.2979, |
|
"eval_samples_per_second": 21.871, |
|
"eval_steps_per_second": 1.369, |
|
"eval_wer": 0.9757360959651036, |
|
"step": 2379 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 0.000266376404494382, |
|
"loss": 2.3488, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.049051284790039, |
|
"eval_runtime": 169.536, |
|
"eval_samples_per_second": 21.582, |
|
"eval_steps_per_second": 1.351, |
|
"eval_wer": 0.9525627044711014, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.8517801761627197, |
|
"eval_runtime": 166.9821, |
|
"eval_samples_per_second": 21.913, |
|
"eval_steps_per_second": 1.371, |
|
"eval_wer": 0.9378407851690295, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.6844531297683716, |
|
"eval_runtime": 166.3633, |
|
"eval_samples_per_second": 21.994, |
|
"eval_steps_per_second": 1.377, |
|
"eval_wer": 0.9285714285714286, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 0.00025794943820224716, |
|
"loss": 1.7859, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.641157627105713, |
|
"eval_runtime": 165.2481, |
|
"eval_samples_per_second": 22.142, |
|
"eval_steps_per_second": 1.386, |
|
"eval_wer": 0.9280261723009815, |
|
"step": 3111 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.5487942695617676, |
|
"eval_runtime": 166.3028, |
|
"eval_samples_per_second": 22.002, |
|
"eval_steps_per_second": 1.377, |
|
"eval_wer": 0.9034896401308615, |
|
"step": 3294 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.4545683860778809, |
|
"eval_runtime": 169.7236, |
|
"eval_samples_per_second": 21.559, |
|
"eval_steps_per_second": 1.349, |
|
"eval_wer": 0.9010359869138496, |
|
"step": 3477 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 0.00024952247191011235, |
|
"loss": 1.3898, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.5146596431732178, |
|
"eval_runtime": 170.5824, |
|
"eval_samples_per_second": 21.45, |
|
"eval_steps_per_second": 1.342, |
|
"eval_wer": 0.9201199563794984, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.446706771850586, |
|
"eval_runtime": 164.7146, |
|
"eval_samples_per_second": 22.214, |
|
"eval_steps_per_second": 1.39, |
|
"eval_wer": 0.895856052344602, |
|
"step": 3843 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.00024111235955056177, |
|
"loss": 1.1291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.474255084991455, |
|
"eval_runtime": 166.7828, |
|
"eval_samples_per_second": 21.939, |
|
"eval_steps_per_second": 1.373, |
|
"eval_wer": 0.9034896401308615, |
|
"step": 4026 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.3826948404312134, |
|
"eval_runtime": 165.2382, |
|
"eval_samples_per_second": 22.144, |
|
"eval_steps_per_second": 1.386, |
|
"eval_wer": 0.876226826608506, |
|
"step": 4209 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.3436840772628784, |
|
"eval_runtime": 167.9623, |
|
"eval_samples_per_second": 21.785, |
|
"eval_steps_per_second": 1.363, |
|
"eval_wer": 0.8792257360959651, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 0.00023268539325842696, |
|
"loss": 0.8993, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.2894562482833862, |
|
"eval_runtime": 179.0778, |
|
"eval_samples_per_second": 20.432, |
|
"eval_steps_per_second": 1.279, |
|
"eval_wer": 0.8576881134133042, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.2927522659301758, |
|
"eval_runtime": 173.2382, |
|
"eval_samples_per_second": 21.121, |
|
"eval_steps_per_second": 1.322, |
|
"eval_wer": 0.8557797164667393, |
|
"step": 4758 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.294653058052063, |
|
"eval_runtime": 166.3679, |
|
"eval_samples_per_second": 21.993, |
|
"eval_steps_per_second": 1.376, |
|
"eval_wer": 0.9163031624863686, |
|
"step": 4941 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 0.0002242584269662921, |
|
"loss": 0.6298, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.3150520324707031, |
|
"eval_runtime": 164.8819, |
|
"eval_samples_per_second": 22.192, |
|
"eval_steps_per_second": 1.389, |
|
"eval_wer": 0.873773173391494, |
|
"step": 5124 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.2971961498260498, |
|
"eval_runtime": 163.7658, |
|
"eval_samples_per_second": 22.343, |
|
"eval_steps_per_second": 1.398, |
|
"eval_wer": 0.8514176663031625, |
|
"step": 5307 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.302983283996582, |
|
"eval_runtime": 162.8739, |
|
"eval_samples_per_second": 22.465, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 0.8432388222464559, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"learning_rate": 0.00021583146067415728, |
|
"loss": 0.4757, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.3263603448867798, |
|
"eval_runtime": 163.1422, |
|
"eval_samples_per_second": 22.428, |
|
"eval_steps_per_second": 1.404, |
|
"eval_wer": 0.836423118865867, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.3130974769592285, |
|
"eval_runtime": 162.7813, |
|
"eval_samples_per_second": 22.478, |
|
"eval_steps_per_second": 1.407, |
|
"eval_wer": 0.8421483097055616, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 32.79, |
|
"learning_rate": 0.00020740449438202247, |
|
"loss": 0.3735, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.3457393646240234, |
|
"eval_runtime": 164.771, |
|
"eval_samples_per_second": 22.207, |
|
"eval_steps_per_second": 1.39, |
|
"eval_wer": 0.8587786259541985, |
|
"step": 6039 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.3450396060943604, |
|
"eval_runtime": 163.6185, |
|
"eval_samples_per_second": 22.363, |
|
"eval_steps_per_second": 1.4, |
|
"eval_wer": 0.8473282442748091, |
|
"step": 6222 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.3451658487319946, |
|
"eval_runtime": 167.7067, |
|
"eval_samples_per_second": 21.818, |
|
"eval_steps_per_second": 1.365, |
|
"eval_wer": 0.9217557251908397, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 0.0001989775280898876, |
|
"loss": 0.3253, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.375409722328186, |
|
"eval_runtime": 164.3009, |
|
"eval_samples_per_second": 22.27, |
|
"eval_steps_per_second": 1.394, |
|
"eval_wer": 0.8396946564885496, |
|
"step": 6588 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.3554260730743408, |
|
"eval_runtime": 163.3464, |
|
"eval_samples_per_second": 22.4, |
|
"eval_steps_per_second": 1.402, |
|
"eval_wer": 0.8353326063249727, |
|
"step": 6771 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.353210687637329, |
|
"eval_runtime": 166.4062, |
|
"eval_samples_per_second": 21.988, |
|
"eval_steps_per_second": 1.376, |
|
"eval_wer": 0.8312431842966194, |
|
"step": 6954 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"learning_rate": 0.0001905505617977528, |
|
"loss": 0.2816, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.369396686553955, |
|
"eval_runtime": 165.9264, |
|
"eval_samples_per_second": 22.052, |
|
"eval_steps_per_second": 1.38, |
|
"eval_wer": 0.8345147219193021, |
|
"step": 7137 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.395269751548767, |
|
"eval_runtime": 163.891, |
|
"eval_samples_per_second": 22.326, |
|
"eval_steps_per_second": 1.397, |
|
"eval_wer": 0.829607415485278, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 0.00018212359550561795, |
|
"loss": 0.2397, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.385826826095581, |
|
"eval_runtime": 165.4232, |
|
"eval_samples_per_second": 22.119, |
|
"eval_steps_per_second": 1.384, |
|
"eval_wer": 0.8293347873500545, |
|
"step": 7503 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.3958967924118042, |
|
"eval_runtime": 163.1457, |
|
"eval_samples_per_second": 22.428, |
|
"eval_steps_per_second": 1.404, |
|
"eval_wer": 0.8402399127589967, |
|
"step": 7686 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.435033917427063, |
|
"eval_runtime": 163.4766, |
|
"eval_samples_per_second": 22.382, |
|
"eval_steps_per_second": 1.401, |
|
"eval_wer": 0.9318429661941112, |
|
"step": 7869 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"learning_rate": 0.0001737134831460674, |
|
"loss": 0.2084, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.4003560543060303, |
|
"eval_runtime": 164.4371, |
|
"eval_samples_per_second": 22.252, |
|
"eval_steps_per_second": 1.393, |
|
"eval_wer": 0.8805888767720829, |
|
"step": 8052 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.387134075164795, |
|
"eval_runtime": 164.8129, |
|
"eval_samples_per_second": 22.201, |
|
"eval_steps_per_second": 1.389, |
|
"eval_wer": 0.8255179934569248, |
|
"step": 8235 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.4059827327728271, |
|
"eval_runtime": 166.2017, |
|
"eval_samples_per_second": 22.015, |
|
"eval_steps_per_second": 1.378, |
|
"eval_wer": 0.8252453653217012, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 0.00016528651685393257, |
|
"loss": 0.1853, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.3992133140563965, |
|
"eval_runtime": 164.2693, |
|
"eval_samples_per_second": 22.274, |
|
"eval_steps_per_second": 1.394, |
|
"eval_wer": 0.8500545256270448, |
|
"step": 8601 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.4186208248138428, |
|
"eval_runtime": 162.8365, |
|
"eval_samples_per_second": 22.47, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 0.8252453653217012, |
|
"step": 8784 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.4120242595672607, |
|
"eval_runtime": 164.9525, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 1.388, |
|
"eval_wer": 0.8165212649945475, |
|
"step": 8967 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 0.00015685955056179775, |
|
"loss": 0.1671, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.4165652990341187, |
|
"eval_runtime": 173.8761, |
|
"eval_samples_per_second": 21.044, |
|
"eval_steps_per_second": 1.317, |
|
"eval_wer": 0.8214285714285714, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 1.4411484003067017, |
|
"eval_runtime": 165.9975, |
|
"eval_samples_per_second": 22.043, |
|
"eval_steps_per_second": 1.38, |
|
"eval_wer": 0.8500545256270448, |
|
"step": 9333 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"learning_rate": 0.00014843258426966292, |
|
"loss": 0.1513, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 1.4692339897155762, |
|
"eval_runtime": 163.251, |
|
"eval_samples_per_second": 22.413, |
|
"eval_steps_per_second": 1.403, |
|
"eval_wer": 0.839422028353326, |
|
"step": 9516 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 1.4640177488327026, |
|
"eval_runtime": 162.0066, |
|
"eval_samples_per_second": 22.586, |
|
"eval_steps_per_second": 1.414, |
|
"eval_wer": 0.8391494002181025, |
|
"step": 9699 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 1.450060486793518, |
|
"eval_runtime": 165.9207, |
|
"eval_samples_per_second": 22.053, |
|
"eval_steps_per_second": 1.38, |
|
"eval_wer": 0.8418756815703381, |
|
"step": 9882 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"learning_rate": 0.00014002247191011234, |
|
"loss": 0.133, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 1.4133520126342773, |
|
"eval_runtime": 163.1361, |
|
"eval_samples_per_second": 22.429, |
|
"eval_steps_per_second": 1.404, |
|
"eval_wer": 0.8350599781897492, |
|
"step": 10065 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 1.4592992067337036, |
|
"eval_runtime": 161.8579, |
|
"eval_samples_per_second": 22.606, |
|
"eval_steps_per_second": 1.415, |
|
"eval_wer": 0.8405125408942202, |
|
"step": 10248 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 1.45597505569458, |
|
"eval_runtime": 161.821, |
|
"eval_samples_per_second": 22.611, |
|
"eval_steps_per_second": 1.415, |
|
"eval_wer": 0.8388767720828789, |
|
"step": 10431 |
|
}, |
|
{ |
|
"epoch": 57.38, |
|
"learning_rate": 0.00013159550561797753, |
|
"loss": 0.1198, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 1.4733554124832153, |
|
"eval_runtime": 162.1949, |
|
"eval_samples_per_second": 22.559, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.8334242093784079, |
|
"step": 10614 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 1.4649208784103394, |
|
"eval_runtime": 162.1709, |
|
"eval_samples_per_second": 22.563, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.8317884405670665, |
|
"step": 10797 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 1.4659123420715332, |
|
"eval_runtime": 163.9506, |
|
"eval_samples_per_second": 22.318, |
|
"eval_steps_per_second": 1.397, |
|
"eval_wer": 0.8099781897491821, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"learning_rate": 0.0001231685393258427, |
|
"loss": 0.1109, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 1.4783909320831299, |
|
"eval_runtime": 165.1461, |
|
"eval_samples_per_second": 22.156, |
|
"eval_steps_per_second": 1.387, |
|
"eval_wer": 0.811886586695747, |
|
"step": 11163 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 1.493830680847168, |
|
"eval_runtime": 164.5944, |
|
"eval_samples_per_second": 22.23, |
|
"eval_steps_per_second": 1.391, |
|
"eval_wer": 0.8148854961832062, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 62.84, |
|
"learning_rate": 0.00011474157303370785, |
|
"loss": 0.1063, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 1.5050164461135864, |
|
"eval_runtime": 163.3536, |
|
"eval_samples_per_second": 22.399, |
|
"eval_steps_per_second": 1.402, |
|
"eval_wer": 0.8151581243184297, |
|
"step": 11529 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 1.4773460626602173, |
|
"eval_runtime": 168.4977, |
|
"eval_samples_per_second": 21.715, |
|
"eval_steps_per_second": 1.359, |
|
"eval_wer": 0.8176117775354417, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 1.4835622310638428, |
|
"eval_runtime": 166.8711, |
|
"eval_samples_per_second": 21.927, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.8260632497273719, |
|
"step": 11895 |
|
}, |
|
{ |
|
"epoch": 65.57, |
|
"learning_rate": 0.00010631460674157301, |
|
"loss": 0.0966, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 1.4978560209274292, |
|
"eval_runtime": 162.3032, |
|
"eval_samples_per_second": 22.544, |
|
"eval_steps_per_second": 1.411, |
|
"eval_wer": 0.8157033805888768, |
|
"step": 12078 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 1.4603493213653564, |
|
"eval_runtime": 162.8062, |
|
"eval_samples_per_second": 22.475, |
|
"eval_steps_per_second": 1.407, |
|
"eval_wer": 0.8047982551799345, |
|
"step": 12261 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 1.4802541732788086, |
|
"eval_runtime": 169.4775, |
|
"eval_samples_per_second": 21.59, |
|
"eval_steps_per_second": 1.351, |
|
"eval_wer": 0.8127044711014176, |
|
"step": 12444 |
|
}, |
|
{ |
|
"epoch": 68.31, |
|
"learning_rate": 9.790449438202247e-05, |
|
"loss": 0.0867, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 1.4973595142364502, |
|
"eval_runtime": 164.1372, |
|
"eval_samples_per_second": 22.292, |
|
"eval_steps_per_second": 1.395, |
|
"eval_wer": 0.8129770992366412, |
|
"step": 12627 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 1.4721262454986572, |
|
"eval_runtime": 163.536, |
|
"eval_samples_per_second": 22.374, |
|
"eval_steps_per_second": 1.4, |
|
"eval_wer": 0.8077971646673937, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 1.4643745422363281, |
|
"eval_runtime": 166.8819, |
|
"eval_samples_per_second": 21.926, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.819247546346783, |
|
"step": 12993 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"learning_rate": 8.947752808988763e-05, |
|
"loss": 0.0827, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 1.4834694862365723, |
|
"eval_runtime": 167.6508, |
|
"eval_samples_per_second": 21.825, |
|
"eval_steps_per_second": 1.366, |
|
"eval_wer": 0.8137949836423118, |
|
"step": 13176 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 1.4933878183364868, |
|
"eval_runtime": 170.3063, |
|
"eval_samples_per_second": 21.485, |
|
"eval_steps_per_second": 1.345, |
|
"eval_wer": 0.8121592148309705, |
|
"step": 13359 |
|
}, |
|
{ |
|
"epoch": 73.77, |
|
"learning_rate": 8.10505617977528e-05, |
|
"loss": 0.0734, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 1.4950696229934692, |
|
"eval_runtime": 175.7645, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 1.303, |
|
"eval_wer": 0.8061613958560524, |
|
"step": 13542 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.490771770477295, |
|
"eval_runtime": 168.0913, |
|
"eval_samples_per_second": 21.768, |
|
"eval_steps_per_second": 1.362, |
|
"eval_wer": 0.806979280261723, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 1.4876092672348022, |
|
"eval_runtime": 166.128, |
|
"eval_samples_per_second": 22.025, |
|
"eval_steps_per_second": 1.378, |
|
"eval_wer": 0.8124318429661941, |
|
"step": 13908 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"learning_rate": 7.262359550561797e-05, |
|
"loss": 0.0664, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 1.493386149406433, |
|
"eval_runtime": 166.8817, |
|
"eval_samples_per_second": 21.926, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.8053435114503816, |
|
"step": 14091 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 1.4603490829467773, |
|
"eval_runtime": 169.3203, |
|
"eval_samples_per_second": 21.61, |
|
"eval_steps_per_second": 1.352, |
|
"eval_wer": 0.8047982551799345, |
|
"step": 14274 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 1.4732308387756348, |
|
"eval_runtime": 165.2553, |
|
"eval_samples_per_second": 22.142, |
|
"eval_steps_per_second": 1.386, |
|
"eval_wer": 0.8072519083969466, |
|
"step": 14457 |
|
}, |
|
{ |
|
"epoch": 79.23, |
|
"learning_rate": 6.42134831460674e-05, |
|
"loss": 0.0602, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.4924767017364502, |
|
"eval_runtime": 166.2646, |
|
"eval_samples_per_second": 22.007, |
|
"eval_steps_per_second": 1.377, |
|
"eval_wer": 0.8077971646673937, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 1.4812291860580444, |
|
"eval_runtime": 166.8878, |
|
"eval_samples_per_second": 21.925, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.806434023991276, |
|
"step": 14823 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"learning_rate": 5.578651685393258e-05, |
|
"loss": 0.057, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 1.4949839115142822, |
|
"eval_runtime": 166.616, |
|
"eval_samples_per_second": 21.961, |
|
"eval_steps_per_second": 1.374, |
|
"eval_wer": 0.8012540894220284, |
|
"step": 15006 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 1.4784878492355347, |
|
"eval_runtime": 172.4918, |
|
"eval_samples_per_second": 21.213, |
|
"eval_steps_per_second": 1.328, |
|
"eval_wer": 0.8056161395856052, |
|
"step": 15189 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 1.485625982284546, |
|
"eval_runtime": 165.5579, |
|
"eval_samples_per_second": 22.101, |
|
"eval_steps_per_second": 1.383, |
|
"eval_wer": 0.7993456924754635, |
|
"step": 15372 |
|
}, |
|
{ |
|
"epoch": 84.7, |
|
"learning_rate": 4.735955056179775e-05, |
|
"loss": 0.0517, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 1.4754849672317505, |
|
"eval_runtime": 168.7909, |
|
"eval_samples_per_second": 21.678, |
|
"eval_steps_per_second": 1.357, |
|
"eval_wer": 0.8034351145038168, |
|
"step": 15555 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 1.4813350439071655, |
|
"eval_runtime": 168.272, |
|
"eval_samples_per_second": 21.745, |
|
"eval_steps_per_second": 1.361, |
|
"eval_wer": 0.8034351145038168, |
|
"step": 15738 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 1.4965763092041016, |
|
"eval_runtime": 166.9884, |
|
"eval_samples_per_second": 21.912, |
|
"eval_steps_per_second": 1.371, |
|
"eval_wer": 0.8047982551799345, |
|
"step": 15921 |
|
}, |
|
{ |
|
"epoch": 87.43, |
|
"learning_rate": 3.893258426966292e-05, |
|
"loss": 0.0468, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 1.4883418083190918, |
|
"eval_runtime": 166.1387, |
|
"eval_samples_per_second": 22.024, |
|
"eval_steps_per_second": 1.378, |
|
"eval_wer": 0.8001635768811342, |
|
"step": 16104 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 1.4746148586273193, |
|
"eval_runtime": 165.9654, |
|
"eval_samples_per_second": 22.047, |
|
"eval_steps_per_second": 1.38, |
|
"eval_wer": 0.8023446019629226, |
|
"step": 16287 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 1.4697260856628418, |
|
"eval_runtime": 166.9567, |
|
"eval_samples_per_second": 21.916, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.7974372955288986, |
|
"step": 16470 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"learning_rate": 3.0505617977528088e-05, |
|
"loss": 0.0426, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 1.4775140285491943, |
|
"eval_runtime": 165.779, |
|
"eval_samples_per_second": 22.072, |
|
"eval_steps_per_second": 1.381, |
|
"eval_wer": 0.8004362050163577, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 1.4852207899093628, |
|
"eval_runtime": 173.415, |
|
"eval_samples_per_second": 21.1, |
|
"eval_steps_per_second": 1.321, |
|
"eval_wer": 0.8023446019629226, |
|
"step": 16836 |
|
}, |
|
{ |
|
"epoch": 92.9, |
|
"learning_rate": 2.2078651685393255e-05, |
|
"loss": 0.0387, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 1.4868098497390747, |
|
"eval_runtime": 166.0768, |
|
"eval_samples_per_second": 22.032, |
|
"eval_steps_per_second": 1.379, |
|
"eval_wer": 0.8004362050163577, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 1.47845458984375, |
|
"eval_runtime": 171.2193, |
|
"eval_samples_per_second": 21.37, |
|
"eval_steps_per_second": 1.337, |
|
"eval_wer": 0.802071973827699, |
|
"step": 17202 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 1.4892385005950928, |
|
"eval_runtime": 165.5064, |
|
"eval_samples_per_second": 22.108, |
|
"eval_steps_per_second": 1.384, |
|
"eval_wer": 0.8015267175572519, |
|
"step": 17385 |
|
}, |
|
{ |
|
"epoch": 95.63, |
|
"learning_rate": 1.3668539325842695e-05, |
|
"loss": 0.0359, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 1.486182451248169, |
|
"eval_runtime": 174.5056, |
|
"eval_samples_per_second": 20.968, |
|
"eval_steps_per_second": 1.312, |
|
"eval_wer": 0.8017993456924755, |
|
"step": 17568 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 1.4851171970367432, |
|
"eval_runtime": 165.0543, |
|
"eval_samples_per_second": 22.168, |
|
"eval_steps_per_second": 1.387, |
|
"eval_wer": 0.8007088331515813, |
|
"step": 17751 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 1.48457932472229, |
|
"eval_runtime": 169.1905, |
|
"eval_samples_per_second": 21.627, |
|
"eval_steps_per_second": 1.354, |
|
"eval_wer": 0.7998909487459106, |
|
"step": 17934 |
|
}, |
|
{ |
|
"epoch": 98.36, |
|
"learning_rate": 5.241573033707864e-06, |
|
"loss": 0.0347, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 1.4852174520492554, |
|
"eval_runtime": 168.792, |
|
"eval_samples_per_second": 21.678, |
|
"eval_steps_per_second": 1.357, |
|
"eval_wer": 0.7993456924754635, |
|
"step": 18117 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.484755277633667, |
|
"eval_runtime": 179.7891, |
|
"eval_samples_per_second": 20.352, |
|
"eval_steps_per_second": 1.274, |
|
"eval_wer": 0.8004362050163577, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 18300, |
|
"total_flos": 1.3576650149787481e+20, |
|
"train_loss": 0.34965579027686616, |
|
"train_runtime": 66008.5183, |
|
"train_samples_per_second": 17.704, |
|
"train_steps_per_second": 0.277 |
|
} |
|
], |
|
"max_steps": 18300, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.3576650149787481e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|