|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.98823529411764, |
|
"global_step": 6300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 11.1133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.900000000000002e-06, |
|
"loss": 3.3967, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.49e-05, |
|
"loss": 3.2205, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 2.8143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.4900000000000002e-05, |
|
"loss": 1.9249, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.9900000000000002e-05, |
|
"loss": 1.6708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 3.49e-05, |
|
"loss": 1.5501, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 3.99e-05, |
|
"loss": 1.4258, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 4.49e-05, |
|
"loss": 1.332, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 4.99e-05, |
|
"loss": 1.2815, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_cer": 0.10093122852447588, |
|
"eval_loss": 0.35359087586402893, |
|
"eval_runtime": 257.0651, |
|
"eval_samples_per_second": 16.852, |
|
"eval_steps_per_second": 2.108, |
|
"eval_wer": 0.47525724236188066, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.957758620689655e-05, |
|
"loss": 1.2632, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 4.9146551724137934e-05, |
|
"loss": 1.2239, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.871551724137931e-05, |
|
"loss": 1.2044, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.828448275862069e-05, |
|
"loss": 1.1918, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 4.785344827586207e-05, |
|
"loss": 1.1641, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 4.742241379310345e-05, |
|
"loss": 1.1718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.699137931034483e-05, |
|
"loss": 1.1638, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 4.656034482758621e-05, |
|
"loss": 1.1317, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 4.612931034482759e-05, |
|
"loss": 1.1334, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 4.569827586206897e-05, |
|
"loss": 1.0869, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"eval_cer": 0.06135152631841044, |
|
"eval_loss": 0.23165984451770782, |
|
"eval_runtime": 262.4285, |
|
"eval_samples_per_second": 16.507, |
|
"eval_steps_per_second": 2.065, |
|
"eval_wer": 0.3110653791356657, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 4.526724137931035e-05, |
|
"loss": 1.104, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 4.4836206896551726e-05, |
|
"loss": 1.109, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 4.440517241379311e-05, |
|
"loss": 1.0902, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 4.397413793103449e-05, |
|
"loss": 1.0676, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 4.3543103448275865e-05, |
|
"loss": 1.0453, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.63, |
|
"learning_rate": 4.311206896551725e-05, |
|
"loss": 1.0489, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"learning_rate": 4.268103448275862e-05, |
|
"loss": 1.0495, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 4.2250000000000004e-05, |
|
"loss": 1.0325, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 4.181896551724138e-05, |
|
"loss": 1.0298, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 4.138793103448276e-05, |
|
"loss": 0.9984, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"eval_cer": 0.052054180568696776, |
|
"eval_loss": 0.20215292274951935, |
|
"eval_runtime": 259.0562, |
|
"eval_samples_per_second": 16.722, |
|
"eval_steps_per_second": 2.092, |
|
"eval_wer": 0.26762703815102107, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 4.0956896551724136e-05, |
|
"loss": 1.0118, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 4.053448275862069e-05, |
|
"loss": 1.0165, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"learning_rate": 4.0103448275862074e-05, |
|
"loss": 1.0075, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 3.967241379310345e-05, |
|
"loss": 1.003, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.77, |
|
"learning_rate": 3.924137931034483e-05, |
|
"loss": 0.9905, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"learning_rate": 3.8810344827586206e-05, |
|
"loss": 1.0019, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"learning_rate": 3.837931034482759e-05, |
|
"loss": 1.0085, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 3.794827586206896e-05, |
|
"loss": 0.9868, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.94, |
|
"learning_rate": 3.7517241379310345e-05, |
|
"loss": 0.9816, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 31.74, |
|
"learning_rate": 3.708620689655173e-05, |
|
"loss": 0.975, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.74, |
|
"eval_cer": 0.04868781435187491, |
|
"eval_loss": 0.19483695924282074, |
|
"eval_runtime": 259.0795, |
|
"eval_samples_per_second": 16.721, |
|
"eval_steps_per_second": 2.092, |
|
"eval_wer": 0.24688934620864333, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.53, |
|
"learning_rate": 3.66551724137931e-05, |
|
"loss": 0.9552, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 3.6224137931034484e-05, |
|
"loss": 0.9649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 3.5793103448275866e-05, |
|
"loss": 0.9632, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 3.536206896551724e-05, |
|
"loss": 0.9542, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 3.493103448275862e-05, |
|
"loss": 0.9686, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.9418, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 37.3, |
|
"learning_rate": 3.406896551724138e-05, |
|
"loss": 0.9295, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 38.09, |
|
"learning_rate": 3.363793103448276e-05, |
|
"loss": 0.9372, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 38.88, |
|
"learning_rate": 3.320689655172414e-05, |
|
"loss": 0.9205, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 39.67, |
|
"learning_rate": 3.277586206896552e-05, |
|
"loss": 0.9306, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 39.67, |
|
"eval_cer": 0.046377172451571136, |
|
"eval_loss": 0.19161736965179443, |
|
"eval_runtime": 258.3157, |
|
"eval_samples_per_second": 16.77, |
|
"eval_steps_per_second": 2.098, |
|
"eval_wer": 0.2377394332752889, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.47, |
|
"learning_rate": 3.23448275862069e-05, |
|
"loss": 0.9331, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 41.27, |
|
"learning_rate": 3.1913793103448276e-05, |
|
"loss": 0.8936, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 42.06, |
|
"learning_rate": 3.148275862068966e-05, |
|
"loss": 0.8987, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 3.105172413793104e-05, |
|
"loss": 0.8853, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 43.64, |
|
"learning_rate": 3.0620689655172415e-05, |
|
"loss": 0.9106, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 3.0189655172413794e-05, |
|
"loss": 0.8932, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 2.9758620689655176e-05, |
|
"loss": 0.9096, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 46.03, |
|
"learning_rate": 2.932758620689655e-05, |
|
"loss": 0.8919, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 2.8896551724137933e-05, |
|
"loss": 0.8744, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 47.61, |
|
"learning_rate": 2.8465517241379315e-05, |
|
"loss": 0.8868, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.61, |
|
"eval_cer": 0.04391713560081669, |
|
"eval_loss": 0.19031885266304016, |
|
"eval_runtime": 265.4438, |
|
"eval_samples_per_second": 16.32, |
|
"eval_steps_per_second": 2.042, |
|
"eval_wer": 0.2257400664872566, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.41, |
|
"learning_rate": 2.803448275862069e-05, |
|
"loss": 0.8793, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"learning_rate": 2.7603448275862072e-05, |
|
"loss": 0.8739, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 2.717241379310345e-05, |
|
"loss": 0.8696, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 50.78, |
|
"learning_rate": 2.674137931034483e-05, |
|
"loss": 0.863, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 51.58, |
|
"learning_rate": 2.6310344827586207e-05, |
|
"loss": 0.8612, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 52.38, |
|
"learning_rate": 2.587931034482759e-05, |
|
"loss": 0.8639, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 53.17, |
|
"learning_rate": 2.5448275862068964e-05, |
|
"loss": 0.8523, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 53.96, |
|
"learning_rate": 2.5017241379310346e-05, |
|
"loss": 0.8577, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 54.75, |
|
"learning_rate": 2.4586206896551725e-05, |
|
"loss": 0.8465, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 55.55, |
|
"learning_rate": 2.4155172413793103e-05, |
|
"loss": 0.8424, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 55.55, |
|
"eval_cer": 0.042293710472586024, |
|
"eval_loss": 0.17861121892929077, |
|
"eval_runtime": 259.0288, |
|
"eval_samples_per_second": 16.724, |
|
"eval_steps_per_second": 2.092, |
|
"eval_wer": 0.22061104954883648, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 56.35, |
|
"learning_rate": 2.3724137931034485e-05, |
|
"loss": 0.8436, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 2.3293103448275864e-05, |
|
"loss": 0.8404, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 57.93, |
|
"learning_rate": 2.2862068965517242e-05, |
|
"loss": 0.8304, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 58.72, |
|
"learning_rate": 2.2431034482758624e-05, |
|
"loss": 0.8331, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.824, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 60.31, |
|
"learning_rate": 2.1568965517241378e-05, |
|
"loss": 0.8328, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"learning_rate": 2.113793103448276e-05, |
|
"loss": 0.8234, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 61.89, |
|
"learning_rate": 2.070689655172414e-05, |
|
"loss": 0.8098, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 62.69, |
|
"learning_rate": 2.0275862068965517e-05, |
|
"loss": 0.8287, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 63.49, |
|
"learning_rate": 1.98448275862069e-05, |
|
"loss": 0.8126, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 63.49, |
|
"eval_cer": 0.04164135252228475, |
|
"eval_loss": 0.18486249446868896, |
|
"eval_runtime": 261.7127, |
|
"eval_samples_per_second": 16.553, |
|
"eval_steps_per_second": 2.071, |
|
"eval_wer": 0.2159886021845813, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.28, |
|
"learning_rate": 1.9413793103448277e-05, |
|
"loss": 0.8089, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 65.08, |
|
"learning_rate": 1.8982758620689656e-05, |
|
"loss": 0.8126, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 65.86, |
|
"learning_rate": 1.8551724137931034e-05, |
|
"loss": 0.7975, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"learning_rate": 1.8120689655172416e-05, |
|
"loss": 0.8049, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 67.45, |
|
"learning_rate": 1.7698275862068966e-05, |
|
"loss": 0.8088, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 68.25, |
|
"learning_rate": 1.7267241379310344e-05, |
|
"loss": 0.8038, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 69.05, |
|
"learning_rate": 1.6836206896551726e-05, |
|
"loss": 0.7886, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 69.83, |
|
"learning_rate": 1.6405172413793105e-05, |
|
"loss": 0.7735, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 70.63, |
|
"learning_rate": 1.5974137931034483e-05, |
|
"loss": 0.7837, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 71.42, |
|
"learning_rate": 1.5543103448275865e-05, |
|
"loss": 0.7901, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 71.42, |
|
"eval_cer": 0.04126786514615806, |
|
"eval_loss": 0.18691900372505188, |
|
"eval_runtime": 261.5464, |
|
"eval_samples_per_second": 16.563, |
|
"eval_steps_per_second": 2.072, |
|
"eval_wer": 0.21383568149438023, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 1.5112068965517242e-05, |
|
"loss": 0.7949, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 73.02, |
|
"learning_rate": 1.468103448275862e-05, |
|
"loss": 0.7893, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"learning_rate": 1.4249999999999999e-05, |
|
"loss": 0.7603, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 74.6, |
|
"learning_rate": 1.3818965517241381e-05, |
|
"loss": 0.776, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 75.39, |
|
"learning_rate": 1.338793103448276e-05, |
|
"loss": 0.7755, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"learning_rate": 1.2956896551724138e-05, |
|
"loss": 0.7751, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 76.97, |
|
"learning_rate": 1.2525862068965518e-05, |
|
"loss": 0.7608, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 77.77, |
|
"learning_rate": 1.2094827586206897e-05, |
|
"loss": 0.7663, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 78.56, |
|
"learning_rate": 1.1663793103448277e-05, |
|
"loss": 0.7656, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 79.36, |
|
"learning_rate": 1.1232758620689656e-05, |
|
"loss": 0.7671, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 79.36, |
|
"eval_cer": 0.03937054927543449, |
|
"eval_loss": 0.18550464510917664, |
|
"eval_runtime": 260.3539, |
|
"eval_samples_per_second": 16.639, |
|
"eval_steps_per_second": 2.082, |
|
"eval_wer": 0.20747190121893302, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.16, |
|
"learning_rate": 1.0801724137931036e-05, |
|
"loss": 0.7694, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 80.94, |
|
"learning_rate": 1.0370689655172414e-05, |
|
"loss": 0.7672, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 81.74, |
|
"learning_rate": 9.939655172413793e-06, |
|
"loss": 0.7444, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 82.53, |
|
"learning_rate": 9.508620689655173e-06, |
|
"loss": 0.7534, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 9.077586206896552e-06, |
|
"loss": 0.7453, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 84.13, |
|
"learning_rate": 8.646551724137932e-06, |
|
"loss": 0.7494, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 8.224137931034483e-06, |
|
"loss": 0.7425, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"learning_rate": 7.793103448275863e-06, |
|
"loss": 0.7499, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 86.5, |
|
"learning_rate": 7.370689655172413e-06, |
|
"loss": 0.735, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 87.3, |
|
"learning_rate": 6.939655172413794e-06, |
|
"loss": 0.7467, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 87.3, |
|
"eval_cer": 0.03894228375080922, |
|
"eval_loss": 0.18841499090194702, |
|
"eval_runtime": 261.43, |
|
"eval_samples_per_second": 16.57, |
|
"eval_steps_per_second": 2.073, |
|
"eval_wer": 0.20490739274972297, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 88.09, |
|
"learning_rate": 6.508620689655173e-06, |
|
"loss": 0.7348, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 88.88, |
|
"learning_rate": 6.0775862068965515e-06, |
|
"loss": 0.7244, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 89.67, |
|
"learning_rate": 5.646551724137932e-06, |
|
"loss": 0.7394, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 90.47, |
|
"learning_rate": 5.21551724137931e-06, |
|
"loss": 0.7423, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 91.27, |
|
"learning_rate": 4.78448275862069e-06, |
|
"loss": 0.7251, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 92.06, |
|
"learning_rate": 4.353448275862069e-06, |
|
"loss": 0.7304, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 92.85, |
|
"learning_rate": 3.9224137931034484e-06, |
|
"loss": 0.7153, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 93.64, |
|
"learning_rate": 3.491379310344828e-06, |
|
"loss": 0.7287, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"learning_rate": 3.0603448275862068e-06, |
|
"loss": 0.7349, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"learning_rate": 2.6293103448275866e-06, |
|
"loss": 0.731, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"eval_cer": 0.03871819132513321, |
|
"eval_loss": 0.1877404898405075, |
|
"eval_runtime": 259.3367, |
|
"eval_samples_per_second": 16.704, |
|
"eval_steps_per_second": 2.09, |
|
"eval_wer": 0.2059838530948235, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.03, |
|
"learning_rate": 2.1982758620689655e-06, |
|
"loss": 0.7151, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 96.82, |
|
"learning_rate": 1.7672413793103449e-06, |
|
"loss": 0.713, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 97.61, |
|
"learning_rate": 1.3362068965517243e-06, |
|
"loss": 0.7257, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 98.41, |
|
"learning_rate": 9.051724137931035e-07, |
|
"loss": 0.7287, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"learning_rate": 4.7413793103448276e-07, |
|
"loss": 0.7273, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 4.310344827586207e-08, |
|
"loss": 0.7082, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"step": 6300, |
|
"total_flos": 4.0887685530877926e+20, |
|
"train_loss": 1.049089940994505, |
|
"train_runtime": 95054.1856, |
|
"train_samples_per_second": 10.723, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"max_steps": 6300, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.0887685530877926e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|