|
{ |
|
"best_metric": 0.14731654524803162, |
|
"best_model_checkpoint": "./xls-r-1b-bem-sv-male/checkpoint-3500", |
|
"epoch": 4.997397188964081, |
|
"eval_steps": 500, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.20822488287350338, |
|
"grad_norm": 6.451992988586426, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 4.6771, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41644976574700676, |
|
"grad_norm": 3.6971700191497803, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 0.9808, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5205622071837585, |
|
"eval_loss": 0.2580466568470001, |
|
"eval_runtime": 117.6846, |
|
"eval_samples_per_second": 8.922, |
|
"eval_steps_per_second": 2.235, |
|
"eval_wer": 0.8361904761904762, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6246746486205101, |
|
"grad_norm": 3.8861448764801025, |
|
"learning_rate": 4.8883720930232564e-05, |
|
"loss": 0.5099, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8328995314940135, |
|
"grad_norm": 6.251056671142578, |
|
"learning_rate": 4.655813953488372e-05, |
|
"loss": 0.4325, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.041124414367517, |
|
"grad_norm": 2.006500005722046, |
|
"learning_rate": 4.423255813953489e-05, |
|
"loss": 0.3877, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.041124414367517, |
|
"eval_loss": 0.20405621826648712, |
|
"eval_runtime": 117.4432, |
|
"eval_samples_per_second": 8.94, |
|
"eval_steps_per_second": 2.239, |
|
"eval_wer": 0.7790476190476191, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2493492972410203, |
|
"grad_norm": 1.86452317237854, |
|
"learning_rate": 4.190697674418605e-05, |
|
"loss": 0.3249, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4575741801145237, |
|
"grad_norm": 2.239264965057373, |
|
"learning_rate": 3.958139534883721e-05, |
|
"loss": 0.298, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5616866215512752, |
|
"eval_loss": 0.1842915266752243, |
|
"eval_runtime": 118.4618, |
|
"eval_samples_per_second": 8.864, |
|
"eval_steps_per_second": 2.22, |
|
"eval_wer": 0.7619047619047619, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.665799062988027, |
|
"grad_norm": 0.9172901511192322, |
|
"learning_rate": 3.725581395348837e-05, |
|
"loss": 0.2978, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.8740239458615304, |
|
"grad_norm": 2.832592487335205, |
|
"learning_rate": 3.4930232558139534e-05, |
|
"loss": 0.2734, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.082248828735034, |
|
"grad_norm": 1.0995967388153076, |
|
"learning_rate": 3.26046511627907e-05, |
|
"loss": 0.2433, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.082248828735034, |
|
"eval_loss": 0.16212375462055206, |
|
"eval_runtime": 117.6082, |
|
"eval_samples_per_second": 8.928, |
|
"eval_steps_per_second": 2.236, |
|
"eval_wer": 0.700952380952381, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.2904737116085374, |
|
"grad_norm": 1.1086174249649048, |
|
"learning_rate": 3.0279069767441864e-05, |
|
"loss": 0.203, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.4986985944820406, |
|
"grad_norm": 1.091976523399353, |
|
"learning_rate": 2.7953488372093022e-05, |
|
"loss": 0.188, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.6028110359187924, |
|
"eval_loss": 0.1653192788362503, |
|
"eval_runtime": 118.9756, |
|
"eval_samples_per_second": 8.825, |
|
"eval_steps_per_second": 2.211, |
|
"eval_wer": 0.6838095238095238, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.706923477355544, |
|
"grad_norm": 0.8310242295265198, |
|
"learning_rate": 2.5627906976744187e-05, |
|
"loss": 0.1958, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.9151483602290473, |
|
"grad_norm": 1.4682559967041016, |
|
"learning_rate": 2.3302325581395352e-05, |
|
"loss": 0.2035, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.123373243102551, |
|
"grad_norm": 0.5501867532730103, |
|
"learning_rate": 2.0976744186046513e-05, |
|
"loss": 0.1521, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.123373243102551, |
|
"eval_loss": 0.14906759560108185, |
|
"eval_runtime": 118.0235, |
|
"eval_samples_per_second": 8.897, |
|
"eval_steps_per_second": 2.228, |
|
"eval_wer": 0.659047619047619, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.331598125976054, |
|
"grad_norm": 0.9272790551185608, |
|
"learning_rate": 1.8651162790697675e-05, |
|
"loss": 0.1381, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.5398230088495577, |
|
"grad_norm": 0.9619794487953186, |
|
"learning_rate": 1.6325581395348837e-05, |
|
"loss": 0.1316, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.643935450286309, |
|
"eval_loss": 0.14731654524803162, |
|
"eval_runtime": 117.6012, |
|
"eval_samples_per_second": 8.928, |
|
"eval_steps_per_second": 2.236, |
|
"eval_wer": 0.6228571428571429, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.748047891723061, |
|
"grad_norm": 0.7800536155700684, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.127, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.956272774596564, |
|
"grad_norm": 4.820044994354248, |
|
"learning_rate": 1.1674418604651163e-05, |
|
"loss": 0.128, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.164497657470068, |
|
"grad_norm": 0.6071628928184509, |
|
"learning_rate": 9.348837209302326e-06, |
|
"loss": 0.0925, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.164497657470068, |
|
"eval_loss": 0.15515577793121338, |
|
"eval_runtime": 118.2807, |
|
"eval_samples_per_second": 8.877, |
|
"eval_steps_per_second": 2.224, |
|
"eval_wer": 0.6447619047619048, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.372722540343571, |
|
"grad_norm": 0.38652822375297546, |
|
"learning_rate": 7.023255813953489e-06, |
|
"loss": 0.0777, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.580947423217075, |
|
"grad_norm": 0.60927414894104, |
|
"learning_rate": 4.697674418604651e-06, |
|
"loss": 0.0767, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.685059864653827, |
|
"eval_loss": 0.15233619511127472, |
|
"eval_runtime": 118.0689, |
|
"eval_samples_per_second": 8.893, |
|
"eval_steps_per_second": 2.228, |
|
"eval_wer": 0.6228571428571429, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.7891723060905775, |
|
"grad_norm": 0.5293630361557007, |
|
"learning_rate": 2.372093023255814e-06, |
|
"loss": 0.0731, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.997397188964081, |
|
"grad_norm": 0.3965218961238861, |
|
"learning_rate": 4.651162790697675e-08, |
|
"loss": 0.0804, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.997397188964081, |
|
"step": 4800, |
|
"total_flos": 2.172568340790461e+19, |
|
"train_loss": 0.42886508484681446, |
|
"train_runtime": 12219.0167, |
|
"train_samples_per_second": 3.144, |
|
"train_steps_per_second": 0.393 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 4800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.172568340790461e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|