|
{ |
|
"best_metric": 0.09492826159492826, |
|
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.99_g1.0-0.05_10_0.004_40/checkpoint-4450", |
|
"epoch": 100.0, |
|
"eval_steps": 50, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.169811320754717e-07, |
|
"loss": 2181.8592, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1087.6209716796875, |
|
"eval_runtime": 3.509, |
|
"eval_samples_per_second": 204.334, |
|
"eval_steps_per_second": 6.555, |
|
"eval_wer": 15.942025358692025, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.5660377358490568e-06, |
|
"loss": 1908.6856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 809.7703247070312, |
|
"eval_runtime": 2.575, |
|
"eval_samples_per_second": 278.445, |
|
"eval_steps_per_second": 8.932, |
|
"eval_wer": 15.877293960627293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.452830188679246e-06, |
|
"loss": 838.4017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 112.64669036865234, |
|
"eval_runtime": 2.3442, |
|
"eval_samples_per_second": 305.859, |
|
"eval_steps_per_second": 9.811, |
|
"eval_wer": 0.9996663329996663, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.339622641509434e-06, |
|
"loss": 117.7945, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 85.67916107177734, |
|
"eval_runtime": 2.4505, |
|
"eval_samples_per_second": 292.59, |
|
"eval_steps_per_second": 9.386, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.226415094339623e-06, |
|
"loss": 109.9946, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 82.57705688476562, |
|
"eval_runtime": 2.3658, |
|
"eval_samples_per_second": 303.071, |
|
"eval_steps_per_second": 9.722, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.0113207547169812e-05, |
|
"loss": 105.7306, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 79.65999603271484, |
|
"eval_runtime": 2.4085, |
|
"eval_samples_per_second": 297.69, |
|
"eval_steps_per_second": 9.549, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.2e-05, |
|
"loss": 102.0127, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 77.22874450683594, |
|
"eval_runtime": 2.2958, |
|
"eval_samples_per_second": 312.304, |
|
"eval_steps_per_second": 10.018, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.3886792452830189e-05, |
|
"loss": 97.9428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 75.43338012695312, |
|
"eval_runtime": 2.2511, |
|
"eval_samples_per_second": 318.513, |
|
"eval_steps_per_second": 10.217, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.577358490566038e-05, |
|
"loss": 96.0055, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 74.68699645996094, |
|
"eval_runtime": 2.3966, |
|
"eval_samples_per_second": 299.18, |
|
"eval_steps_per_second": 9.597, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.766037735849057e-05, |
|
"loss": 96.9376, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 74.24928283691406, |
|
"eval_runtime": 2.3235, |
|
"eval_samples_per_second": 308.593, |
|
"eval_steps_per_second": 9.899, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.9547169811320757e-05, |
|
"loss": 95.6634, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 74.1340560913086, |
|
"eval_runtime": 2.2186, |
|
"eval_samples_per_second": 323.18, |
|
"eval_steps_per_second": 10.367, |
|
"eval_wer": 1.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9996868319012422e-05, |
|
"loss": 96.1578, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 74.90034484863281, |
|
"eval_runtime": 2.3308, |
|
"eval_samples_per_second": 307.622, |
|
"eval_steps_per_second": 9.868, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.9983208992285993e-05, |
|
"loss": 92.5678, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 75.66032409667969, |
|
"eval_runtime": 2.35, |
|
"eval_samples_per_second": 305.104, |
|
"eval_steps_per_second": 9.787, |
|
"eval_wer": 1.0598098098098099, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.9958724515842856e-05, |
|
"loss": 90.5927, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 73.4554672241211, |
|
"eval_runtime": 2.3862, |
|
"eval_samples_per_second": 300.482, |
|
"eval_steps_per_second": 9.639, |
|
"eval_wer": 1.0538872205538872, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.9923441439074434e-05, |
|
"loss": 87.8965, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 72.41024017333984, |
|
"eval_runtime": 2.3682, |
|
"eval_samples_per_second": 302.757, |
|
"eval_steps_per_second": 9.712, |
|
"eval_wer": 0.9986653319986654, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.9877398020679796e-05, |
|
"loss": 86.8467, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 69.77374267578125, |
|
"eval_runtime": 2.3633, |
|
"eval_samples_per_second": 303.392, |
|
"eval_steps_per_second": 9.732, |
|
"eval_wer": 0.9984150817484151, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.9820644187180354e-05, |
|
"loss": 85.3381, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 67.84330749511719, |
|
"eval_runtime": 2.282, |
|
"eval_samples_per_second": 314.204, |
|
"eval_steps_per_second": 10.079, |
|
"eval_wer": 0.9717217217217218, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.975324147878278e-05, |
|
"loss": 80.3298, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 52.40813064575195, |
|
"eval_runtime": 2.2309, |
|
"eval_samples_per_second": 321.398, |
|
"eval_steps_per_second": 10.31, |
|
"eval_wer": 0.8594427761094428, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.9675262982648757e-05, |
|
"loss": 56.9494, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 25.267763137817383, |
|
"eval_runtime": 2.3806, |
|
"eval_samples_per_second": 301.181, |
|
"eval_steps_per_second": 9.661, |
|
"eval_wer": 0.3554387721054388, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.958679325364396e-05, |
|
"loss": 32.292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 14.86340618133545, |
|
"eval_runtime": 2.2474, |
|
"eval_samples_per_second": 319.039, |
|
"eval_steps_per_second": 10.234, |
|
"eval_wer": 0.21896896896896897, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.9487928222652195e-05, |
|
"loss": 22.3255, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 11.289799690246582, |
|
"eval_runtime": 2.4098, |
|
"eval_samples_per_second": 297.529, |
|
"eval_steps_per_second": 9.544, |
|
"eval_wer": 0.182349015682349, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.9378775092554124e-05, |
|
"loss": 17.6187, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 9.138720512390137, |
|
"eval_runtime": 2.3047, |
|
"eval_samples_per_second": 311.103, |
|
"eval_steps_per_second": 9.98, |
|
"eval_wer": 0.15340340340340342, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.925945222198336e-05, |
|
"loss": 15.1531, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_loss": 7.663585662841797, |
|
"eval_runtime": 2.3252, |
|
"eval_samples_per_second": 308.358, |
|
"eval_steps_per_second": 9.892, |
|
"eval_wer": 0.13680347013680347, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.9130088996985967e-05, |
|
"loss": 13.1696, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_loss": 7.029110908508301, |
|
"eval_runtime": 2.4316, |
|
"eval_samples_per_second": 294.871, |
|
"eval_steps_per_second": 9.459, |
|
"eval_wer": 0.14339339339339338, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.8990825690722557e-05, |
|
"loss": 11.9792, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 6.686671257019043, |
|
"eval_runtime": 2.3989, |
|
"eval_samples_per_second": 298.889, |
|
"eval_steps_per_second": 9.588, |
|
"eval_wer": 0.1324657991324658, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.8841813311365105e-05, |
|
"loss": 11.2404, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_loss": 6.294769287109375, |
|
"eval_runtime": 2.3601, |
|
"eval_samples_per_second": 303.796, |
|
"eval_steps_per_second": 9.745, |
|
"eval_wer": 0.12128795462128796, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.868321343835339e-05, |
|
"loss": 10.6256, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_loss": 5.715055465698242, |
|
"eval_runtime": 2.2982, |
|
"eval_samples_per_second": 311.988, |
|
"eval_steps_per_second": 10.008, |
|
"eval_wer": 0.1180347013680347, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.8515198047188652e-05, |
|
"loss": 9.452, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_loss": 5.419599533081055, |
|
"eval_runtime": 2.4076, |
|
"eval_samples_per_second": 297.802, |
|
"eval_steps_per_second": 9.553, |
|
"eval_wer": 0.1175342008675342, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.833794932295441e-05, |
|
"loss": 9.3087, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_loss": 5.292884826660156, |
|
"eval_runtime": 2.3017, |
|
"eval_samples_per_second": 311.506, |
|
"eval_steps_per_second": 9.993, |
|
"eval_wer": 0.11244577911244578, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.8151659462766685e-05, |
|
"loss": 8.5149, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_loss": 5.13940954208374, |
|
"eval_runtime": 2.4233, |
|
"eval_samples_per_second": 295.882, |
|
"eval_steps_per_second": 9.491, |
|
"eval_wer": 0.11628294961628295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 1.7956530467367805e-05, |
|
"loss": 8.3662, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 5.127543926239014, |
|
"eval_runtime": 2.3508, |
|
"eval_samples_per_second": 304.996, |
|
"eval_steps_per_second": 9.784, |
|
"eval_wer": 0.12128795462128796, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 1.7752773922089784e-05, |
|
"loss": 7.8852, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_loss": 4.903261184692383, |
|
"eval_runtime": 2.4091, |
|
"eval_samples_per_second": 297.622, |
|
"eval_steps_per_second": 9.547, |
|
"eval_wer": 0.10927594260927594, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 1.7540610767424813e-05, |
|
"loss": 7.5135, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 4.957184314727783, |
|
"eval_runtime": 2.4012, |
|
"eval_samples_per_second": 298.606, |
|
"eval_steps_per_second": 9.579, |
|
"eval_wer": 0.10969302635969302, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 1.7320271059451597e-05, |
|
"loss": 7.5374, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_loss": 4.758788108825684, |
|
"eval_runtime": 2.4229, |
|
"eval_samples_per_second": 295.923, |
|
"eval_steps_per_second": 9.493, |
|
"eval_wer": 0.1016016016016016, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7091993720377336e-05, |
|
"loss": 7.2968, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 4.7317328453063965, |
|
"eval_runtime": 2.2458, |
|
"eval_samples_per_second": 319.258, |
|
"eval_steps_per_second": 10.241, |
|
"eval_wer": 0.10326993660326994, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.685602627946584e-05, |
|
"loss": 7.0861, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_loss": 4.791558265686035, |
|
"eval_runtime": 2.2472, |
|
"eval_samples_per_second": 319.068, |
|
"eval_steps_per_second": 10.235, |
|
"eval_wer": 0.10869202535869203, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.661262460463274e-05, |
|
"loss": 6.6371, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 4.7941060066223145, |
|
"eval_runtime": 2.2988, |
|
"eval_samples_per_second": 311.908, |
|
"eval_steps_per_second": 10.005, |
|
"eval_wer": 0.11319652986319653, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 1.6362052624998767e-05, |
|
"loss": 6.6186, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_loss": 4.660822868347168, |
|
"eval_runtime": 2.4244, |
|
"eval_samples_per_second": 295.746, |
|
"eval_steps_per_second": 9.487, |
|
"eval_wer": 0.1036036036036036, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 1.6104582044701983e-05, |
|
"loss": 6.6288, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_loss": 4.679046154022217, |
|
"eval_runtime": 2.2429, |
|
"eval_samples_per_second": 319.68, |
|
"eval_steps_per_second": 10.255, |
|
"eval_wer": 0.10744077410744077, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.584049204827929e-05, |
|
"loss": 6.2433, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 4.77145528793335, |
|
"eval_runtime": 2.2791, |
|
"eval_samples_per_second": 314.598, |
|
"eval_steps_per_second": 10.092, |
|
"eval_wer": 0.11211211211211211, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 1.5570068997936686e-05, |
|
"loss": 6.2362, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 4.6420440673828125, |
|
"eval_runtime": 2.4168, |
|
"eval_samples_per_second": 296.676, |
|
"eval_steps_per_second": 9.517, |
|
"eval_wer": 0.10343677010343677, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.5293606123036508e-05, |
|
"loss": 5.957, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_loss": 4.575562953948975, |
|
"eval_runtime": 2.4332, |
|
"eval_samples_per_second": 294.67, |
|
"eval_steps_per_second": 9.452, |
|
"eval_wer": 0.10702369035702369, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.5011403202138346e-05, |
|
"loss": 5.8034, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_loss": 4.411165714263916, |
|
"eval_runtime": 2.3583, |
|
"eval_samples_per_second": 304.027, |
|
"eval_steps_per_second": 9.753, |
|
"eval_wer": 0.10602268935602269, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.4723766237938495e-05, |
|
"loss": 5.4943, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_loss": 4.563217639923096, |
|
"eval_runtime": 2.4228, |
|
"eval_samples_per_second": 295.938, |
|
"eval_steps_per_second": 9.493, |
|
"eval_wer": 0.10335335335335336, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.4431007125460274e-05, |
|
"loss": 5.5593, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 4.537557601928711, |
|
"eval_runtime": 2.4026, |
|
"eval_samples_per_second": 298.421, |
|
"eval_steps_per_second": 9.573, |
|
"eval_wer": 0.11052719386052719, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.4133443313855155e-05, |
|
"loss": 5.3447, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_loss": 4.54231071472168, |
|
"eval_runtime": 2.2971, |
|
"eval_samples_per_second": 312.127, |
|
"eval_steps_per_second": 10.012, |
|
"eval_wer": 0.1006006006006006, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 1.3831397462181298e-05, |
|
"loss": 5.4181, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_loss": 4.378854274749756, |
|
"eval_runtime": 2.3724, |
|
"eval_samples_per_second": 302.223, |
|
"eval_steps_per_second": 9.695, |
|
"eval_wer": 0.09926593259926593, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 1.3525197089532833e-05, |
|
"loss": 5.222, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_loss": 4.369490146636963, |
|
"eval_runtime": 2.2903, |
|
"eval_samples_per_second": 313.058, |
|
"eval_steps_per_second": 10.042, |
|
"eval_wer": 0.1031031031031031, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 1.3215174219899224e-05, |
|
"loss": 5.1146, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_loss": 4.410806179046631, |
|
"eval_runtime": 2.3034, |
|
"eval_samples_per_second": 311.283, |
|
"eval_steps_per_second": 9.985, |
|
"eval_wer": 0.10844177510844177, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 1.2901665022139796e-05, |
|
"loss": 5.0952, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_loss": 4.295694828033447, |
|
"eval_runtime": 2.2425, |
|
"eval_samples_per_second": 319.732, |
|
"eval_steps_per_second": 10.256, |
|
"eval_wer": 0.1016016016016016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.2585009445463867e-05, |
|
"loss": 4.9023, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_loss": 4.3768768310546875, |
|
"eval_runtime": 2.4393, |
|
"eval_samples_per_second": 293.942, |
|
"eval_steps_per_second": 9.429, |
|
"eval_wer": 0.1021021021021021, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 1.2265550850811663e-05, |
|
"loss": 5.1633, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_loss": 4.36325216293335, |
|
"eval_runtime": 2.3579, |
|
"eval_samples_per_second": 304.084, |
|
"eval_steps_per_second": 9.754, |
|
"eval_wer": 0.10627293960627295, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.1943635638535827e-05, |
|
"loss": 4.9489, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 4.3422441482543945, |
|
"eval_runtime": 2.2967, |
|
"eval_samples_per_second": 312.186, |
|
"eval_steps_per_second": 10.014, |
|
"eval_wer": 0.10452118785452119, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"learning_rate": 1.1619612872787144e-05, |
|
"loss": 4.7391, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"eval_loss": 4.251036643981934, |
|
"eval_runtime": 2.3762, |
|
"eval_samples_per_second": 301.737, |
|
"eval_steps_per_second": 9.679, |
|
"eval_wer": 0.10293626960293627, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 1.1293833903011819e-05, |
|
"loss": 4.7996, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 4.3253912925720215, |
|
"eval_runtime": 2.3965, |
|
"eval_samples_per_second": 299.183, |
|
"eval_steps_per_second": 9.597, |
|
"eval_wer": 0.10118451785118451, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.0966651982970757e-05, |
|
"loss": 4.244, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"eval_loss": 4.41210412979126, |
|
"eval_runtime": 2.392, |
|
"eval_samples_per_second": 299.749, |
|
"eval_steps_per_second": 9.615, |
|
"eval_wer": 0.10352018685352019, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 1.0638421887693887e-05, |
|
"loss": 4.5831, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"eval_loss": 4.405577182769775, |
|
"eval_runtime": 2.3528, |
|
"eval_samples_per_second": 304.748, |
|
"eval_steps_per_second": 9.776, |
|
"eval_wer": 0.10443777110443778, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 1.0309499528784948e-05, |
|
"loss": 4.5198, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"eval_loss": 4.363803386688232, |
|
"eval_runtime": 2.3305, |
|
"eval_samples_per_second": 307.658, |
|
"eval_steps_per_second": 9.869, |
|
"eval_wer": 0.1050216883550217, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"learning_rate": 9.980241568493834e-06, |
|
"loss": 4.1964, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"eval_loss": 4.339745044708252, |
|
"eval_runtime": 2.3557, |
|
"eval_samples_per_second": 304.369, |
|
"eval_steps_per_second": 9.764, |
|
"eval_wer": 0.10710710710710711, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 9.651005032974994e-06, |
|
"loss": 4.0544, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_loss": 4.349318981170654, |
|
"eval_runtime": 2.4132, |
|
"eval_samples_per_second": 297.115, |
|
"eval_steps_per_second": 9.531, |
|
"eval_wer": 0.1031031031031031, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"learning_rate": 9.322146925151226e-06, |
|
"loss": 4.3568, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"eval_loss": 4.472135066986084, |
|
"eval_runtime": 2.3982, |
|
"eval_samples_per_second": 298.97, |
|
"eval_steps_per_second": 9.59, |
|
"eval_wer": 0.10593927260593927, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 8.994023837602694e-06, |
|
"loss": 4.2692, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"eval_loss": 4.427754878997803, |
|
"eval_runtime": 2.4471, |
|
"eval_samples_per_second": 292.994, |
|
"eval_steps_per_second": 9.399, |
|
"eval_wer": 0.11169502836169502, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 8.666991565900827e-06, |
|
"loss": 4.1226, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"eval_loss": 4.308145523071289, |
|
"eval_runtime": 2.3553, |
|
"eval_samples_per_second": 304.422, |
|
"eval_steps_per_second": 9.765, |
|
"eval_wer": 0.10035035035035035, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"learning_rate": 8.341404722806525e-06, |
|
"loss": 4.2681, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"eval_loss": 4.417555332183838, |
|
"eval_runtime": 2.2916, |
|
"eval_samples_per_second": 312.876, |
|
"eval_steps_per_second": 10.036, |
|
"eval_wer": 0.10585585585585586, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 8.017616353750874e-06, |
|
"loss": 3.8412, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"eval_loss": 4.321342945098877, |
|
"eval_runtime": 2.3267, |
|
"eval_samples_per_second": 308.163, |
|
"eval_steps_per_second": 9.885, |
|
"eval_wer": 0.10276943610276944, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"learning_rate": 7.695977554015387e-06, |
|
"loss": 4.1387, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"eval_loss": 4.341909408569336, |
|
"eval_runtime": 2.2567, |
|
"eval_samples_per_second": 317.721, |
|
"eval_steps_per_second": 10.192, |
|
"eval_wer": 0.10560560560560561, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 7.376837088026863e-06, |
|
"loss": 3.6847, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 4.249768257141113, |
|
"eval_runtime": 2.4172, |
|
"eval_samples_per_second": 296.628, |
|
"eval_steps_per_second": 9.515, |
|
"eval_wer": 0.10652318985652319, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"learning_rate": 7.0605410111796855e-06, |
|
"loss": 3.8768, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"eval_loss": 4.277639389038086, |
|
"eval_runtime": 2.4245, |
|
"eval_samples_per_second": 295.733, |
|
"eval_steps_per_second": 9.487, |
|
"eval_wer": 0.10276943610276944, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 6.7536610307503735e-06, |
|
"loss": 3.659, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"eval_loss": 4.298828125, |
|
"eval_runtime": 2.4088, |
|
"eval_samples_per_second": 297.656, |
|
"eval_steps_per_second": 9.548, |
|
"eval_wer": 0.10076743410076744, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 6.444005348764207e-06, |
|
"loss": 3.809, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"eval_loss": 4.304114818572998, |
|
"eval_runtime": 2.3018, |
|
"eval_samples_per_second": 311.5, |
|
"eval_steps_per_second": 9.992, |
|
"eval_wer": 0.10343677010343677, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"learning_rate": 6.138205558658212e-06, |
|
"loss": 3.7459, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_loss": 4.295498847961426, |
|
"eval_runtime": 2.3818, |
|
"eval_samples_per_second": 301.033, |
|
"eval_steps_per_second": 9.657, |
|
"eval_wer": 0.09951618284951619, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 5.83659325005591e-06, |
|
"loss": 3.7996, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"eval_loss": 4.284261703491211, |
|
"eval_runtime": 2.241, |
|
"eval_samples_per_second": 319.944, |
|
"eval_steps_per_second": 10.263, |
|
"eval_wer": 0.09926593259926593, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 5.53949547194521e-06, |
|
"loss": 3.6773, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_loss": 4.239638328552246, |
|
"eval_runtime": 2.3554, |
|
"eval_samples_per_second": 304.413, |
|
"eval_steps_per_second": 9.765, |
|
"eval_wer": 0.09876543209876543, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"learning_rate": 5.247234378047524e-06, |
|
"loss": 3.6364, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"eval_loss": 4.220588207244873, |
|
"eval_runtime": 2.4135, |
|
"eval_samples_per_second": 297.083, |
|
"eval_steps_per_second": 9.53, |
|
"eval_wer": 0.09634634634634634, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 4.960126877495005e-06, |
|
"loss": 3.6342, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"eval_loss": 4.290452480316162, |
|
"eval_runtime": 2.3553, |
|
"eval_samples_per_second": 304.417, |
|
"eval_steps_per_second": 9.765, |
|
"eval_wer": 0.10176843510176843, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"learning_rate": 4.67848429119466e-06, |
|
"loss": 3.7012, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"eval_loss": 4.308350086212158, |
|
"eval_runtime": 2.3492, |
|
"eval_samples_per_second": 305.208, |
|
"eval_steps_per_second": 9.79, |
|
"eval_wer": 0.09943276609943276, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 4.402612014251967e-06, |
|
"loss": 3.4846, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_loss": 4.287242412567139, |
|
"eval_runtime": 2.3668, |
|
"eval_samples_per_second": 302.942, |
|
"eval_steps_per_second": 9.718, |
|
"eval_wer": 0.09759759759759759, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"learning_rate": 4.132809184820095e-06, |
|
"loss": 3.4814, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"eval_loss": 4.259552955627441, |
|
"eval_runtime": 2.2432, |
|
"eval_samples_per_second": 319.627, |
|
"eval_steps_per_second": 10.253, |
|
"eval_wer": 0.10026693360026694, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 3.869368359733711e-06, |
|
"loss": 3.3212, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_loss": 4.226958274841309, |
|
"eval_runtime": 2.3893, |
|
"eval_samples_per_second": 300.093, |
|
"eval_steps_per_second": 9.626, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"learning_rate": 3.6125751972791635e-06, |
|
"loss": 3.6578, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"eval_loss": 4.247718811035156, |
|
"eval_runtime": 2.4075, |
|
"eval_samples_per_second": 297.814, |
|
"eval_steps_per_second": 9.553, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"learning_rate": 3.3627081474450273e-06, |
|
"loss": 3.4573, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"eval_loss": 4.238850116729736, |
|
"eval_runtime": 2.3207, |
|
"eval_samples_per_second": 308.956, |
|
"eval_steps_per_second": 9.911, |
|
"eval_wer": 0.09726393059726393, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"learning_rate": 3.120038149988832e-06, |
|
"loss": 3.5776, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"eval_loss": 4.282679557800293, |
|
"eval_runtime": 2.3987, |
|
"eval_samples_per_second": 298.911, |
|
"eval_steps_per_second": 9.589, |
|
"eval_wer": 0.09893226559893227, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"learning_rate": 2.884828340647414e-06, |
|
"loss": 3.5116, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"eval_loss": 4.324526786804199, |
|
"eval_runtime": 2.3498, |
|
"eval_samples_per_second": 305.135, |
|
"eval_steps_per_second": 9.788, |
|
"eval_wer": 0.10018351685018352, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 2.657333765809459e-06, |
|
"loss": 3.3334, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"eval_loss": 4.270716667175293, |
|
"eval_runtime": 2.4082, |
|
"eval_samples_per_second": 297.731, |
|
"eval_steps_per_second": 9.551, |
|
"eval_wer": 0.0995995995995996, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"learning_rate": 2.437801105959594e-06, |
|
"loss": 3.4829, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"eval_loss": 4.245628833770752, |
|
"eval_runtime": 2.3251, |
|
"eval_samples_per_second": 308.368, |
|
"eval_steps_per_second": 9.892, |
|
"eval_wer": 0.09818151484818151, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 2.2264684081939447e-06, |
|
"loss": 3.44, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"eval_loss": 4.284632682800293, |
|
"eval_runtime": 2.3871, |
|
"eval_samples_per_second": 300.371, |
|
"eval_steps_per_second": 9.635, |
|
"eval_wer": 0.10026693360026694, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 2.023564828097159e-06, |
|
"loss": 3.4112, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"eval_loss": 4.280004501342773, |
|
"eval_runtime": 2.3804, |
|
"eval_samples_per_second": 301.204, |
|
"eval_steps_per_second": 9.662, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1.829310381260848e-06, |
|
"loss": 3.3825, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"eval_loss": 4.256912708282471, |
|
"eval_runtime": 2.3009, |
|
"eval_samples_per_second": 311.615, |
|
"eval_steps_per_second": 9.996, |
|
"eval_wer": 0.09759759759759759, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1.647535456169591e-06, |
|
"loss": 3.3444, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"eval_loss": 4.2334303855896, |
|
"eval_runtime": 2.4524, |
|
"eval_samples_per_second": 292.361, |
|
"eval_steps_per_second": 9.378, |
|
"eval_wer": 0.09492826159492826, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 1.4710184570696184e-06, |
|
"loss": 3.5125, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"eval_loss": 4.26320743560791, |
|
"eval_runtime": 2.3986, |
|
"eval_samples_per_second": 298.918, |
|
"eval_steps_per_second": 9.589, |
|
"eval_wer": 0.09784784784784785, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"learning_rate": 1.3037497369753871e-06, |
|
"loss": 3.3393, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_loss": 4.25075626373291, |
|
"eval_runtime": 2.4865, |
|
"eval_samples_per_second": 288.354, |
|
"eval_steps_per_second": 9.25, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"learning_rate": 1.1459106713283286e-06, |
|
"loss": 3.4698, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"eval_loss": 4.248310089111328, |
|
"eval_runtime": 2.3393, |
|
"eval_samples_per_second": 306.501, |
|
"eval_steps_per_second": 9.832, |
|
"eval_wer": 0.10001668335001668, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 9.976724106591128e-07, |
|
"loss": 3.3466, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"eval_loss": 4.256034851074219, |
|
"eval_runtime": 2.41, |
|
"eval_samples_per_second": 297.505, |
|
"eval_steps_per_second": 9.543, |
|
"eval_wer": 0.09851518184851518, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"learning_rate": 8.591956950030067e-07, |
|
"loss": 3.3808, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"eval_loss": 4.255034446716309, |
|
"eval_runtime": 2.293, |
|
"eval_samples_per_second": 312.694, |
|
"eval_steps_per_second": 10.031, |
|
"eval_wer": 0.09734734734734735, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 7.306306796037188e-07, |
|
"loss": 3.3442, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"eval_loss": 4.2573628425598145, |
|
"eval_runtime": 2.3853, |
|
"eval_samples_per_second": 300.593, |
|
"eval_steps_per_second": 9.642, |
|
"eval_wer": 0.09818151484818151, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"learning_rate": 6.121167720947174e-07, |
|
"loss": 3.0359, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"eval_loss": 4.257233142852783, |
|
"eval_runtime": 2.3441, |
|
"eval_samples_per_second": 305.87, |
|
"eval_steps_per_second": 9.812, |
|
"eval_wer": 0.09934934934934934, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 5.037824813345571e-07, |
|
"loss": 3.5286, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"eval_loss": 4.250905513763428, |
|
"eval_runtime": 2.4654, |
|
"eval_samples_per_second": 290.825, |
|
"eval_steps_per_second": 9.329, |
|
"eval_wer": 0.09934934934934934, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 4.057452780601334e-07, |
|
"loss": 3.0826, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"eval_loss": 4.240777492523193, |
|
"eval_runtime": 2.4178, |
|
"eval_samples_per_second": 296.545, |
|
"eval_steps_per_second": 9.513, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 3.1811146750898025e-07, |
|
"loss": 3.513, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 4.253066539764404, |
|
"eval_runtime": 2.2776, |
|
"eval_samples_per_second": 314.806, |
|
"eval_steps_per_second": 10.098, |
|
"eval_wer": 0.09901568234901569, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"learning_rate": 2.4097607414869995e-07, |
|
"loss": 3.272, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"eval_loss": 4.255825996398926, |
|
"eval_runtime": 2.3617, |
|
"eval_samples_per_second": 303.593, |
|
"eval_steps_per_second": 9.739, |
|
"eval_wer": 0.09951618284951619, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"learning_rate": 1.7442273863854553e-07, |
|
"loss": 3.2433, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"eval_loss": 4.2515153884887695, |
|
"eval_runtime": 2.4174, |
|
"eval_samples_per_second": 296.604, |
|
"eval_steps_per_second": 9.515, |
|
"eval_wer": 0.09918251584918251, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"learning_rate": 1.185236271348722e-07, |
|
"loss": 3.3373, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"eval_loss": 4.252400875091553, |
|
"eval_runtime": 2.3337, |
|
"eval_samples_per_second": 307.237, |
|
"eval_steps_per_second": 9.856, |
|
"eval_wer": 0.1001001001001001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 7.33393530387927e-08, |
|
"loss": 3.2239, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"eval_loss": 4.253963947296143, |
|
"eval_runtime": 2.307, |
|
"eval_samples_per_second": 310.799, |
|
"eval_steps_per_second": 9.97, |
|
"eval_wer": 0.09951618284951619, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 3.8918911270908745e-08, |
|
"loss": 3.4072, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"eval_loss": 4.248571872711182, |
|
"eval_runtime": 2.3602, |
|
"eval_samples_per_second": 303.785, |
|
"eval_steps_per_second": 9.745, |
|
"eval_wer": 0.09934934934934934, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"learning_rate": 1.5665974539441632e-08, |
|
"loss": 3.3015, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"eval_loss": 4.249679088592529, |
|
"eval_runtime": 2.4076, |
|
"eval_samples_per_second": 297.807, |
|
"eval_steps_per_second": 9.553, |
|
"eval_wer": 0.09876543209876543, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.6567448613734612e-09, |
|
"loss": 3.329, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 4.2488484382629395, |
|
"eval_runtime": 2.329, |
|
"eval_samples_per_second": 307.858, |
|
"eval_steps_per_second": 9.876, |
|
"eval_wer": 0.09901568234901569, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5300, |
|
"total_flos": 8.034472481207091e+16, |
|
"train_loss": 65.34310805986513, |
|
"train_runtime": 4047.7439, |
|
"train_samples_per_second": 83.602, |
|
"train_steps_per_second": 1.309 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5300, |
|
"num_train_epochs": 100, |
|
"save_steps": 50, |
|
"total_flos": 8.034472481207091e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|