|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 200, |
|
"global_step": 5010, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3992015968063872, |
|
"eval_loss": 1.943410873413086, |
|
"eval_runtime": 41.2309, |
|
"eval_samples_per_second": 11.52, |
|
"eval_steps_per_second": 1.455, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7984031936127745, |
|
"eval_loss": 0.3648405969142914, |
|
"eval_runtime": 41.3464, |
|
"eval_samples_per_second": 11.488, |
|
"eval_steps_per_second": 1.451, |
|
"eval_wer": 0.5411255411255411, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.998003992015968, |
|
"grad_norm": 0.7569882273674011, |
|
"learning_rate": 0.00027568228105906314, |
|
"loss": 2.5692, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1976047904191618, |
|
"eval_loss": 0.3371403217315674, |
|
"eval_runtime": 40.9227, |
|
"eval_samples_per_second": 11.607, |
|
"eval_steps_per_second": 1.466, |
|
"eval_wer": 0.5174433409727527, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.596806387225549, |
|
"eval_loss": 0.32294145226478577, |
|
"eval_runtime": 41.2686, |
|
"eval_samples_per_second": 11.51, |
|
"eval_steps_per_second": 1.454, |
|
"eval_wer": 0.5212630506748154, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.996007984031936, |
|
"grad_norm": 0.6301750540733337, |
|
"learning_rate": 0.000245132382892057, |
|
"loss": 0.3941, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.996007984031936, |
|
"eval_loss": 0.31834524869918823, |
|
"eval_runtime": 40.8769, |
|
"eval_samples_per_second": 11.62, |
|
"eval_steps_per_second": 1.468, |
|
"eval_wer": 0.49146931499872676, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.3952095808383236, |
|
"eval_loss": 0.3067522943019867, |
|
"eval_runtime": 40.1904, |
|
"eval_samples_per_second": 11.819, |
|
"eval_steps_per_second": 1.493, |
|
"eval_wer": 0.5072574484339191, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7944111776447107, |
|
"eval_loss": 0.30573877692222595, |
|
"eval_runtime": 40.3076, |
|
"eval_samples_per_second": 11.784, |
|
"eval_steps_per_second": 1.489, |
|
"eval_wer": 0.46880570409982175, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9940119760479043, |
|
"grad_norm": 0.6246519088745117, |
|
"learning_rate": 0.0002145824847250509, |
|
"loss": 0.3502, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.193612774451098, |
|
"eval_loss": 0.30173251032829285, |
|
"eval_runtime": 40.844, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.469, |
|
"eval_wer": 0.47771836007130125, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.592814371257485, |
|
"eval_loss": 0.2904900014400482, |
|
"eval_runtime": 40.4163, |
|
"eval_samples_per_second": 11.753, |
|
"eval_steps_per_second": 1.485, |
|
"eval_wer": 0.46473134708428826, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.992015968063872, |
|
"grad_norm": 0.9133301973342896, |
|
"learning_rate": 0.00018403258655804477, |
|
"loss": 0.3253, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.992015968063872, |
|
"eval_loss": 0.2856718897819519, |
|
"eval_runtime": 40.3664, |
|
"eval_samples_per_second": 11.767, |
|
"eval_steps_per_second": 1.486, |
|
"eval_wer": 0.4685510567863509, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.391217564870259, |
|
"eval_loss": 0.28921443223953247, |
|
"eval_runtime": 40.4989, |
|
"eval_samples_per_second": 11.729, |
|
"eval_steps_per_second": 1.482, |
|
"eval_wer": 0.4601476954418131, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.790419161676647, |
|
"eval_loss": 0.28484851121902466, |
|
"eval_runtime": 40.2649, |
|
"eval_samples_per_second": 11.797, |
|
"eval_steps_per_second": 1.49, |
|
"eval_wer": 0.47593582887700536, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.99001996007984, |
|
"grad_norm": 0.7654047012329102, |
|
"learning_rate": 0.00015348268839103868, |
|
"loss": 0.3066, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.189620758483034, |
|
"eval_loss": 0.2800922989845276, |
|
"eval_runtime": 40.3508, |
|
"eval_samples_per_second": 11.772, |
|
"eval_steps_per_second": 1.487, |
|
"eval_wer": 0.44435956200662086, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.588822355289421, |
|
"eval_loss": 0.27517372369766235, |
|
"eval_runtime": 40.2103, |
|
"eval_samples_per_second": 11.813, |
|
"eval_steps_per_second": 1.492, |
|
"eval_wer": 0.4626941685765215, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.9880239520958085, |
|
"grad_norm": 0.9405556917190552, |
|
"learning_rate": 0.00012293279022403258, |
|
"loss": 0.2988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.9880239520958085, |
|
"eval_loss": 0.28181877732276917, |
|
"eval_runtime": 40.5684, |
|
"eval_samples_per_second": 11.709, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 0.4614209320091673, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.387225548902196, |
|
"eval_loss": 0.27585282921791077, |
|
"eval_runtime": 41.0192, |
|
"eval_samples_per_second": 11.58, |
|
"eval_steps_per_second": 1.463, |
|
"eval_wer": 0.44435956200662086, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.786427145708583, |
|
"eval_loss": 0.27509135007858276, |
|
"eval_runtime": 40.7557, |
|
"eval_samples_per_second": 11.655, |
|
"eval_steps_per_second": 1.472, |
|
"eval_wer": 0.4382480264833206, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.986027944111776, |
|
"grad_norm": 0.8755282163619995, |
|
"learning_rate": 9.238289205702647e-05, |
|
"loss": 0.2877, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.18562874251497, |
|
"eval_loss": 0.2725882828235626, |
|
"eval_runtime": 40.5214, |
|
"eval_samples_per_second": 11.722, |
|
"eval_steps_per_second": 1.481, |
|
"eval_wer": 0.4471606824548001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.584830339321357, |
|
"eval_loss": 0.27224990725517273, |
|
"eval_runtime": 40.7203, |
|
"eval_samples_per_second": 11.665, |
|
"eval_steps_per_second": 1.473, |
|
"eval_wer": 0.4484339190221543, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.984031936127744, |
|
"grad_norm": 0.6233875155448914, |
|
"learning_rate": 6.183299389002036e-05, |
|
"loss": 0.2812, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.984031936127744, |
|
"eval_loss": 0.2709678113460541, |
|
"eval_runtime": 40.5287, |
|
"eval_samples_per_second": 11.72, |
|
"eval_steps_per_second": 1.48, |
|
"eval_wer": 0.434428316781258, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.383233532934131, |
|
"eval_loss": 0.2734103798866272, |
|
"eval_runtime": 40.5, |
|
"eval_samples_per_second": 11.728, |
|
"eval_steps_per_second": 1.481, |
|
"eval_wer": 0.4410491469314999, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.782435129740518, |
|
"eval_loss": 0.27336403727531433, |
|
"eval_runtime": 40.7879, |
|
"eval_samples_per_second": 11.646, |
|
"eval_steps_per_second": 1.471, |
|
"eval_wer": 0.435956200662083, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.982035928143713, |
|
"grad_norm": 0.49290069937705994, |
|
"learning_rate": 3.128309572301426e-05, |
|
"loss": 0.2742, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.181636726546905, |
|
"eval_loss": 0.2758755087852478, |
|
"eval_runtime": 40.5476, |
|
"eval_samples_per_second": 11.715, |
|
"eval_steps_per_second": 1.48, |
|
"eval_wer": 0.43977591036414565, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.580838323353294, |
|
"eval_loss": 0.27402016520500183, |
|
"eval_runtime": 40.6331, |
|
"eval_samples_per_second": 11.69, |
|
"eval_steps_per_second": 1.477, |
|
"eval_wer": 0.4336643748408454, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.980039920159681, |
|
"grad_norm": 0.8148034811019897, |
|
"learning_rate": 7.331975560081466e-07, |
|
"loss": 0.2731, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.980039920159681, |
|
"eval_loss": 0.27219507098197937, |
|
"eval_runtime": 40.3217, |
|
"eval_samples_per_second": 11.78, |
|
"eval_steps_per_second": 1.488, |
|
"eval_wer": 0.4382480264833206, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5010, |
|
"total_flos": 7.851078607918333e+18, |
|
"train_loss": 0.535597919037718, |
|
"train_runtime": 6681.5115, |
|
"train_samples_per_second": 5.997, |
|
"train_steps_per_second": 0.75 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5010, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.851078607918333e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|