|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.95402298850575, |
|
"global_step": 8692, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.123417721518986e-06, |
|
"loss": 14.6554, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.766772151898734e-05, |
|
"loss": 6.292, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.6212025316455694e-05, |
|
"loss": 4.5602, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.475632911392405e-05, |
|
"loss": 3.675, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.505185604095459, |
|
"eval_runtime": 388.8147, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.3300632911392406e-05, |
|
"loss": 3.4053, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 5.184493670886076e-05, |
|
"loss": 3.3269, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.038924050632911e-05, |
|
"loss": 3.2702, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 6.893354430379747e-05, |
|
"loss": 3.0446, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_cer": 0.52146416703672, |
|
"eval_loss": 2.2759008407592773, |
|
"eval_runtime": 391.3771, |
|
"eval_samples_per_second": 19.797, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 1.0052476497825171, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.3045, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.9583, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.8142, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.7276, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_cer": 0.19686409281152592, |
|
"eval_loss": 0.7083391547203064, |
|
"eval_runtime": 386.6895, |
|
"eval_samples_per_second": 20.037, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 0.6697067489827417, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.6571, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.6004, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5572, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5171, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_cer": 0.15683826416017196, |
|
"eval_loss": 0.5328246355056763, |
|
"eval_runtime": 388.9054, |
|
"eval_samples_per_second": 19.923, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 0.5733408166128806, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4951, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4553, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4348, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4176, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"eval_cer": 0.13810176574442937, |
|
"eval_loss": 0.4571371376514435, |
|
"eval_runtime": 390.2121, |
|
"eval_samples_per_second": 19.856, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 0.5160656657780273, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3988, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3666, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3585, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.343, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"eval_cer": 0.11596506696356744, |
|
"eval_loss": 0.3910418748855591, |
|
"eval_runtime": 386.6143, |
|
"eval_samples_per_second": 20.041, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 0.45223796828960294, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3196, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3085, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3006, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2743, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"eval_cer": 0.10436362622251684, |
|
"eval_loss": 0.3533952534198761, |
|
"eval_runtime": 386.8689, |
|
"eval_samples_per_second": 20.027, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 0.41365230812403536, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.256, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2627, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2496, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2396, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"eval_cer": 0.09588994077001131, |
|
"eval_loss": 0.3277980387210846, |
|
"eval_runtime": 388.8491, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 0.38773677564192505, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2291, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2254, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2063, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2035, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"eval_cer": 0.09172603609575479, |
|
"eval_loss": 0.31091800332069397, |
|
"eval_runtime": 384.3384, |
|
"eval_samples_per_second": 20.159, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 0.3740844675178897, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1937, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1787, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1779, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1745, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_cer": 0.08818425010028982, |
|
"eval_loss": 0.2971595227718353, |
|
"eval_runtime": 385.6559, |
|
"eval_samples_per_second": 20.09, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 0.3618212431598148, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1697, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1506, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1512, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 7.414768806073154e-05, |
|
"loss": 1.1541, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"eval_cer": 0.08323089827501506, |
|
"eval_loss": 0.28362834453582764, |
|
"eval_runtime": 385.7883, |
|
"eval_samples_per_second": 20.084, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 0.3427108180159955, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 7.250862663906142e-05, |
|
"loss": 1.1464, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 7.08695652173913e-05, |
|
"loss": 1.1395, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 6.923050379572118e-05, |
|
"loss": 1.1432, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 6.759144237405107e-05, |
|
"loss": 1.1372, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"eval_cer": 0.08124869945017581, |
|
"eval_loss": 0.2759494483470917, |
|
"eval_runtime": 383.6861, |
|
"eval_samples_per_second": 20.194, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 0.3357373368878911, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 6.595238095238095e-05, |
|
"loss": 1.1179, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 6.431331953071083e-05, |
|
"loss": 1.1221, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 29.31, |
|
"learning_rate": 6.267425810904071e-05, |
|
"loss": 1.116, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"learning_rate": 6.103519668737059e-05, |
|
"loss": 1.1048, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"eval_cer": 0.07828827263385742, |
|
"eval_loss": 0.2669001519680023, |
|
"eval_runtime": 386.0726, |
|
"eval_samples_per_second": 20.069, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 0.32842710818015997, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 30.46, |
|
"learning_rate": 5.939613526570048e-05, |
|
"loss": 1.1005, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"learning_rate": 5.775707384403037e-05, |
|
"loss": 1.1056, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 31.61, |
|
"learning_rate": 5.6118012422360246e-05, |
|
"loss": 1.093, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 32.18, |
|
"learning_rate": 5.4478951000690126e-05, |
|
"loss": 1.0966, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 32.18, |
|
"eval_cer": 0.07754173021930756, |
|
"eval_loss": 0.2677817642688751, |
|
"eval_runtime": 386.0716, |
|
"eval_samples_per_second": 20.069, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 0.32491932089238107, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 32.76, |
|
"learning_rate": 5.283988957902001e-05, |
|
"loss": 1.0884, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 5.120082815734989e-05, |
|
"loss": 1.0878, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"learning_rate": 4.956176673567977e-05, |
|
"loss": 1.0803, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 4.792270531400966e-05, |
|
"loss": 1.0747, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"eval_cer": 0.07477866519074373, |
|
"eval_loss": 0.25474071502685547, |
|
"eval_runtime": 385.0232, |
|
"eval_samples_per_second": 20.123, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 0.31337168514101305, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 35.06, |
|
"learning_rate": 4.628364389233954e-05, |
|
"loss": 1.0704, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 35.63, |
|
"learning_rate": 4.464458247066942e-05, |
|
"loss": 1.0707, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 4.30055210489993e-05, |
|
"loss": 1.0599, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 36.78, |
|
"learning_rate": 4.136645962732919e-05, |
|
"loss": 1.0593, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 36.78, |
|
"eval_cer": 0.07282435444460891, |
|
"eval_loss": 0.2490725815296173, |
|
"eval_runtime": 384.9593, |
|
"eval_samples_per_second": 20.127, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 0.3077311631822646, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 37.36, |
|
"learning_rate": 3.972739820565908e-05, |
|
"loss": 1.0572, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 37.93, |
|
"learning_rate": 3.808833678398896e-05, |
|
"loss": 1.0506, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 38.51, |
|
"learning_rate": 3.644927536231883e-05, |
|
"loss": 1.0478, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 39.08, |
|
"learning_rate": 3.4810213940648726e-05, |
|
"loss": 1.0417, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 39.08, |
|
"eval_cer": 0.07105667930211156, |
|
"eval_loss": 0.24495387077331543, |
|
"eval_runtime": 382.2473, |
|
"eval_samples_per_second": 20.27, |
|
"eval_steps_per_second": 0.319, |
|
"eval_wer": 0.301220709976147, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 3.3171152518978605e-05, |
|
"loss": 1.0359, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 40.23, |
|
"learning_rate": 3.153209109730849e-05, |
|
"loss": 1.0389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 2.989302967563837e-05, |
|
"loss": 1.0365, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 41.38, |
|
"learning_rate": 2.8253968253968247e-05, |
|
"loss": 1.024, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 41.38, |
|
"eval_cer": 0.06938768505349148, |
|
"eval_loss": 0.24019765853881836, |
|
"eval_runtime": 381.671, |
|
"eval_samples_per_second": 20.3, |
|
"eval_steps_per_second": 0.32, |
|
"eval_wer": 0.29560825031570087, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 41.95, |
|
"learning_rate": 2.6614906832298136e-05, |
|
"loss": 1.0288, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 42.53, |
|
"learning_rate": 2.4975845410628016e-05, |
|
"loss": 1.0283, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 43.1, |
|
"learning_rate": 2.3353174603174605e-05, |
|
"loss": 1.0153, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"learning_rate": 2.1714113181504485e-05, |
|
"loss": 1.0106, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"eval_cer": 0.06811555961720395, |
|
"eval_loss": 0.23507660627365112, |
|
"eval_runtime": 383.1253, |
|
"eval_samples_per_second": 20.223, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 0.29151115476357514, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 2.007505175983437e-05, |
|
"loss": 1.013, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"learning_rate": 1.843599033816425e-05, |
|
"loss": 1.0067, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 1.6796928916494137e-05, |
|
"loss": 1.0076, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"learning_rate": 1.5157867494824016e-05, |
|
"loss": 1.0014, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"eval_cer": 0.06731109580842178, |
|
"eval_loss": 0.23282095789909363, |
|
"eval_runtime": 382.9081, |
|
"eval_samples_per_second": 20.235, |
|
"eval_steps_per_second": 0.319, |
|
"eval_wer": 0.2896309807773257, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 1.3518806073153895e-05, |
|
"loss": 0.9955, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 1.1879744651483783e-05, |
|
"loss": 0.9998, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"learning_rate": 1.0240683229813662e-05, |
|
"loss": 0.9956, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 48.28, |
|
"learning_rate": 8.60162180814355e-06, |
|
"loss": 0.9999, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 48.28, |
|
"eval_cer": 0.06668254141915997, |
|
"eval_loss": 0.2318294197320938, |
|
"eval_runtime": 383.814, |
|
"eval_samples_per_second": 20.187, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 0.2865862214115336, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 48.85, |
|
"learning_rate": 6.962560386473429e-06, |
|
"loss": 0.9898, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 49.43, |
|
"learning_rate": 5.323498964803316e-06, |
|
"loss": 0.997, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"step": 8692, |
|
"total_flos": 2.038617513286856e+20, |
|
"train_loss": 1.5808194972818148, |
|
"train_runtime": 72373.5093, |
|
"train_samples_per_second": 15.373, |
|
"train_steps_per_second": 0.12 |
|
} |
|
], |
|
"max_steps": 8692, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.038617513286856e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|