|
{ |
|
"best_metric": 0.2201576977968216, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bemgen-female-model/checkpoint-1300", |
|
"epoch": 5.987654320987654, |
|
"eval_steps": 100, |
|
"global_step": 1700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3527336860670194, |
|
"eval_loss": 4.0290117263793945, |
|
"eval_runtime": 20.5733, |
|
"eval_samples_per_second": 13.367, |
|
"eval_steps_per_second": 3.354, |
|
"eval_wer": 1.0089621277825962, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7054673721340388, |
|
"eval_loss": 2.847311019897461, |
|
"eval_runtime": 20.3386, |
|
"eval_samples_per_second": 13.521, |
|
"eval_steps_per_second": 3.393, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.056437389770723, |
|
"eval_loss": 0.6011506915092468, |
|
"eval_runtime": 20.4093, |
|
"eval_samples_per_second": 13.474, |
|
"eval_steps_per_second": 3.381, |
|
"eval_wer": 0.9268574732581671, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4091710758377425, |
|
"eval_loss": 0.41906026005744934, |
|
"eval_runtime": 20.4439, |
|
"eval_samples_per_second": 13.451, |
|
"eval_steps_per_second": 3.375, |
|
"eval_wer": 0.8378143972246314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7619047619047619, |
|
"grad_norm": 2.2041492462158203, |
|
"learning_rate": 2.976e-05, |
|
"loss": 4.9128, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.7619047619047619, |
|
"eval_loss": 0.33197441697120667, |
|
"eval_runtime": 20.5028, |
|
"eval_samples_per_second": 13.413, |
|
"eval_steps_per_second": 3.365, |
|
"eval_wer": 0.6423821913847932, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.112874779541446, |
|
"eval_loss": 0.2715909779071808, |
|
"eval_runtime": 20.5575, |
|
"eval_samples_per_second": 13.377, |
|
"eval_steps_per_second": 3.356, |
|
"eval_wer": 0.5177797051170858, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.4656084656084656, |
|
"eval_loss": 0.27242884039878845, |
|
"eval_runtime": 20.5374, |
|
"eval_samples_per_second": 13.39, |
|
"eval_steps_per_second": 3.36, |
|
"eval_wer": 0.4929170280427869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.818342151675485, |
|
"eval_loss": 0.2515610158443451, |
|
"eval_runtime": 20.5916, |
|
"eval_samples_per_second": 13.355, |
|
"eval_steps_per_second": 3.351, |
|
"eval_wer": 0.4787510841283608, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.1693121693121693, |
|
"eval_loss": 0.2385055273771286, |
|
"eval_runtime": 20.5752, |
|
"eval_samples_per_second": 13.366, |
|
"eval_steps_per_second": 3.354, |
|
"eval_wer": 0.44376987568661463, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.5220458553791887, |
|
"grad_norm": 2.694538116455078, |
|
"learning_rate": 2.8137672090112644e-05, |
|
"loss": 0.4407, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.5220458553791887, |
|
"eval_loss": 0.2374495565891266, |
|
"eval_runtime": 20.582, |
|
"eval_samples_per_second": 13.361, |
|
"eval_steps_per_second": 3.352, |
|
"eval_wer": 0.43451864700780574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.874779541446208, |
|
"eval_loss": 0.2354438155889511, |
|
"eval_runtime": 20.5688, |
|
"eval_samples_per_second": 13.37, |
|
"eval_steps_per_second": 3.355, |
|
"eval_wer": 0.4096559699335068, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.225749559082892, |
|
"eval_loss": 0.22052845358848572, |
|
"eval_runtime": 20.5553, |
|
"eval_samples_per_second": 13.379, |
|
"eval_steps_per_second": 3.357, |
|
"eval_wer": 0.3960682278115062, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.578483245149911, |
|
"eval_loss": 0.2201576977968216, |
|
"eval_runtime": 20.6429, |
|
"eval_samples_per_second": 13.322, |
|
"eval_steps_per_second": 3.343, |
|
"eval_wer": 0.3897080080948251, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.931216931216931, |
|
"eval_loss": 0.22464053332805634, |
|
"eval_runtime": 20.524, |
|
"eval_samples_per_second": 13.399, |
|
"eval_steps_per_second": 3.362, |
|
"eval_wer": 0.3897080080948251, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.2821869488536155, |
|
"grad_norm": 1.8704134225845337, |
|
"learning_rate": 2.626032540675845e-05, |
|
"loss": 0.2698, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.2821869488536155, |
|
"eval_loss": 0.2338586151599884, |
|
"eval_runtime": 20.5858, |
|
"eval_samples_per_second": 13.359, |
|
"eval_steps_per_second": 3.352, |
|
"eval_wer": 0.36657993639780284, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.634920634920634, |
|
"eval_loss": 0.2358291894197464, |
|
"eval_runtime": 20.5183, |
|
"eval_samples_per_second": 13.403, |
|
"eval_steps_per_second": 3.363, |
|
"eval_wer": 0.3735183579069095, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.987654320987654, |
|
"eval_loss": 0.22457349300384521, |
|
"eval_runtime": 20.6137, |
|
"eval_samples_per_second": 13.341, |
|
"eval_steps_per_second": 3.347, |
|
"eval_wer": 0.38074588031222895, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.987654320987654, |
|
"step": 1700, |
|
"total_flos": 1.0310954059792316e+19, |
|
"train_loss": 1.6788882132137524, |
|
"train_runtime": 2349.413, |
|
"train_samples_per_second": 28.922, |
|
"train_steps_per_second": 3.614 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8490, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0310954059792316e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|