|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 758, |
|
"global_step": 11370, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6596306068601583, |
|
"grad_norm": 1.0755512714385986, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.7968, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2848176658153534, |
|
"eval_runtime": 177.1073, |
|
"eval_samples_per_second": 64.565, |
|
"eval_steps_per_second": 4.037, |
|
"eval_wer": 0.5295409253093712, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.3192612137203166, |
|
"grad_norm": 0.398578405380249, |
|
"learning_rate": 0.00028628334866605336, |
|
"loss": 0.3537, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.978891820580475, |
|
"grad_norm": 0.6923481822013855, |
|
"learning_rate": 0.00027248390064397424, |
|
"loss": 0.2547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.19083459675312042, |
|
"eval_runtime": 177.247, |
|
"eval_samples_per_second": 64.514, |
|
"eval_steps_per_second": 4.034, |
|
"eval_wer": 0.42224847551880096, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 2.638522427440633, |
|
"grad_norm": 0.2965054214000702, |
|
"learning_rate": 0.0002586844526218951, |
|
"loss": 0.1929, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.1753465086221695, |
|
"eval_runtime": 183.1101, |
|
"eval_samples_per_second": 62.449, |
|
"eval_steps_per_second": 3.905, |
|
"eval_wer": 0.39995704006014393, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 3.2981530343007917, |
|
"grad_norm": 0.22291332483291626, |
|
"learning_rate": 0.000244885004599816, |
|
"loss": 0.172, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.9577836411609497, |
|
"grad_norm": 0.21266356110572815, |
|
"learning_rate": 0.00023108555657773688, |
|
"loss": 0.1532, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.15584248304367065, |
|
"eval_runtime": 179.8085, |
|
"eval_samples_per_second": 63.595, |
|
"eval_steps_per_second": 3.976, |
|
"eval_wer": 0.37095908065728705, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 4.617414248021108, |
|
"grad_norm": 0.3851657509803772, |
|
"learning_rate": 0.00021728610855565776, |
|
"loss": 0.1297, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.15116363763809204, |
|
"eval_runtime": 178.6057, |
|
"eval_samples_per_second": 64.024, |
|
"eval_steps_per_second": 4.003, |
|
"eval_wer": 0.35356030501557295, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 5.277044854881266, |
|
"grad_norm": 0.6844151616096497, |
|
"learning_rate": 0.00020348666053357864, |
|
"loss": 0.1236, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.936675461741425, |
|
"grad_norm": 0.2345355600118637, |
|
"learning_rate": 0.00018968721251149952, |
|
"loss": 0.1167, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.15742355585098267, |
|
"eval_runtime": 183.8285, |
|
"eval_samples_per_second": 62.205, |
|
"eval_steps_per_second": 3.889, |
|
"eval_wer": 0.35143617465602217, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 6.596306068601583, |
|
"grad_norm": 0.18163040280342102, |
|
"learning_rate": 0.00017588776448942042, |
|
"loss": 0.101, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.1482517421245575, |
|
"eval_runtime": 181.3176, |
|
"eval_samples_per_second": 63.066, |
|
"eval_steps_per_second": 3.943, |
|
"eval_wer": 0.3374383942529147, |
|
"step": 5306 |
|
}, |
|
{ |
|
"epoch": 7.255936675461742, |
|
"grad_norm": 0.3131236135959625, |
|
"learning_rate": 0.0001620883164673413, |
|
"loss": 0.0945, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.915567282321899, |
|
"grad_norm": 0.5193817615509033, |
|
"learning_rate": 0.00014828886844526218, |
|
"loss": 0.0859, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.14896264672279358, |
|
"eval_runtime": 180.9851, |
|
"eval_samples_per_second": 63.182, |
|
"eval_steps_per_second": 3.951, |
|
"eval_wer": 0.32986073819496653, |
|
"step": 6064 |
|
}, |
|
{ |
|
"epoch": 8.575197889182059, |
|
"grad_norm": 0.21172136068344116, |
|
"learning_rate": 0.00013448942042318306, |
|
"loss": 0.0791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.15233299136161804, |
|
"eval_runtime": 179.7038, |
|
"eval_samples_per_second": 63.632, |
|
"eval_steps_per_second": 3.979, |
|
"eval_wer": 0.32503967827778374, |
|
"step": 6822 |
|
}, |
|
{ |
|
"epoch": 9.234828496042216, |
|
"grad_norm": 0.20351053774356842, |
|
"learning_rate": 0.0001207175712971481, |
|
"loss": 0.0745, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.894459102902374, |
|
"grad_norm": 0.1984056681394577, |
|
"learning_rate": 0.00010694572217111315, |
|
"loss": 0.0702, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.16084744036197662, |
|
"eval_runtime": 179.8551, |
|
"eval_samples_per_second": 63.579, |
|
"eval_steps_per_second": 3.975, |
|
"eval_wer": 0.3191923531307056, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 10.554089709762533, |
|
"grad_norm": 0.9130335450172424, |
|
"learning_rate": 9.314627414903402e-05, |
|
"loss": 0.0629, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.16640283167362213, |
|
"eval_runtime": 180.2008, |
|
"eval_samples_per_second": 63.457, |
|
"eval_steps_per_second": 3.968, |
|
"eval_wer": 0.31460995954605664, |
|
"step": 8338 |
|
}, |
|
{ |
|
"epoch": 11.213720316622691, |
|
"grad_norm": 0.24980618059635162, |
|
"learning_rate": 7.934682612695491e-05, |
|
"loss": 0.0587, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.87335092348285, |
|
"grad_norm": 0.2797747850418091, |
|
"learning_rate": 6.55473781048758e-05, |
|
"loss": 0.0559, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.16405943036079407, |
|
"eval_runtime": 182.307, |
|
"eval_samples_per_second": 62.724, |
|
"eval_steps_per_second": 3.922, |
|
"eval_wer": 0.3103020322438215, |
|
"step": 9096 |
|
}, |
|
{ |
|
"epoch": 12.532981530343008, |
|
"grad_norm": 0.15323172509670258, |
|
"learning_rate": 5.1775528978840844e-05, |
|
"loss": 0.0527, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.16645778715610504, |
|
"eval_runtime": 180.7018, |
|
"eval_samples_per_second": 63.281, |
|
"eval_steps_per_second": 3.957, |
|
"eval_wer": 0.3063043711738804, |
|
"step": 9854 |
|
}, |
|
{ |
|
"epoch": 13.192612137203167, |
|
"grad_norm": 0.7859821319580078, |
|
"learning_rate": 3.797608095676173e-05, |
|
"loss": 0.0491, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.852242744063325, |
|
"grad_norm": 0.2534639537334442, |
|
"learning_rate": 2.4176632934682608e-05, |
|
"loss": 0.0468, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.16911591589450836, |
|
"eval_runtime": 181.5239, |
|
"eval_samples_per_second": 62.994, |
|
"eval_steps_per_second": 3.939, |
|
"eval_wer": 0.30112531175789686, |
|
"step": 10612 |
|
}, |
|
{ |
|
"epoch": 14.511873350923484, |
|
"grad_norm": 0.21820344030857086, |
|
"learning_rate": 1.0377184912603496e-05, |
|
"loss": 0.0443, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.17480023205280304, |
|
"eval_runtime": 182.6284, |
|
"eval_samples_per_second": 62.613, |
|
"eval_steps_per_second": 3.915, |
|
"eval_wer": 0.2998007136123343, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 11370, |
|
"total_flos": 1.156593069425741e+20, |
|
"train_loss": 0.27266296556149017, |
|
"train_runtime": 21246.5656, |
|
"train_samples_per_second": 34.234, |
|
"train_steps_per_second": 0.535 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 758, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.156593069425741e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|