|
{ |
|
"best_metric": 73.01711252807637, |
|
"best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_base_squad_model/checkpoint-4140", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 690, |
|
"train_exact_match": 53.84615384615385, |
|
"train_f1": 69.79095478605717, |
|
"train_runtime": 14.0504, |
|
"train_samples_per_second": 89.82, |
|
"train_steps_per_second": 3.274 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 19.47218894958496, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6813, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 52.0625, |
|
"eval_f1": 67.50170309345776, |
|
"eval_runtime": 44.891, |
|
"eval_samples_per_second": 90.263, |
|
"eval_steps_per_second": 3.23, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1380, |
|
"train_exact_match": 63.93606393606394, |
|
"train_f1": 76.38693770706172, |
|
"train_runtime": 14.2008, |
|
"train_samples_per_second": 89.643, |
|
"train_steps_per_second": 3.239 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 45.075042724609375, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3266, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 56.15625, |
|
"eval_f1": 70.83430989837035, |
|
"eval_runtime": 45.0891, |
|
"eval_samples_per_second": 89.867, |
|
"eval_steps_per_second": 3.216, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2070, |
|
"train_exact_match": 67.43256743256744, |
|
"train_f1": 80.45847460323496, |
|
"train_runtime": 14.4622, |
|
"train_samples_per_second": 89.544, |
|
"train_steps_per_second": 3.25 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 17.318368911743164, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.1036, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 57.46875, |
|
"eval_f1": 71.70302249821802, |
|
"eval_runtime": 44.9476, |
|
"eval_samples_per_second": 90.149, |
|
"eval_steps_per_second": 3.226, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2760, |
|
"train_exact_match": 71.72827172827172, |
|
"train_f1": 84.90184305868037, |
|
"train_runtime": 14.4996, |
|
"train_samples_per_second": 89.313, |
|
"train_steps_per_second": 3.241 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 217.5539093017578, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9255, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 58.34375, |
|
"eval_f1": 72.47808689784853, |
|
"eval_runtime": 44.8378, |
|
"eval_samples_per_second": 90.37, |
|
"eval_steps_per_second": 3.234, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3450, |
|
"train_exact_match": 76.32367632367632, |
|
"train_f1": 88.37491424418958, |
|
"train_runtime": 14.4084, |
|
"train_samples_per_second": 85.992, |
|
"train_steps_per_second": 3.123 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 394.8098449707031, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.7806, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 58.21875, |
|
"eval_f1": 72.48290373070347, |
|
"eval_runtime": 45.4975, |
|
"eval_samples_per_second": 89.06, |
|
"eval_steps_per_second": 3.187, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4140, |
|
"train_exact_match": 78.72127872127872, |
|
"train_f1": 88.97987588000849, |
|
"train_runtime": 14.2096, |
|
"train_samples_per_second": 88.813, |
|
"train_steps_per_second": 3.237 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 17.177759170532227, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6664, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 59.0, |
|
"eval_f1": 73.01711252807637, |
|
"eval_runtime": 45.351, |
|
"eval_samples_per_second": 89.348, |
|
"eval_steps_per_second": 3.197, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 4830, |
|
"train_exact_match": 82.51748251748252, |
|
"train_f1": 91.72028497776134, |
|
"train_runtime": 13.942, |
|
"train_samples_per_second": 89.37, |
|
"train_steps_per_second": 3.228 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 354.42584228515625, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.5911, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 58.53125, |
|
"eval_f1": 72.55506746037935, |
|
"eval_runtime": 45.1047, |
|
"eval_samples_per_second": 89.835, |
|
"eval_steps_per_second": 3.215, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5520, |
|
"train_exact_match": 81.91808191808192, |
|
"train_f1": 91.29001859546244, |
|
"train_runtime": 14.3482, |
|
"train_samples_per_second": 89.14, |
|
"train_steps_per_second": 3.206 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 184.72279357910156, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5247, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 58.25, |
|
"eval_f1": 72.07621980566323, |
|
"eval_runtime": 45.0133, |
|
"eval_samples_per_second": 90.018, |
|
"eval_steps_per_second": 3.221, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6210, |
|
"train_exact_match": 83.21678321678321, |
|
"train_f1": 92.6376023057958, |
|
"train_runtime": 14.5039, |
|
"train_samples_per_second": 89.355, |
|
"train_steps_per_second": 3.24 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.3138372898101807, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.4735, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 58.25, |
|
"eval_f1": 72.13528491061855, |
|
"eval_runtime": 45.1402, |
|
"eval_samples_per_second": 89.765, |
|
"eval_steps_per_second": 3.212, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6900, |
|
"train_exact_match": 86.81318681318682, |
|
"train_f1": 94.26051176344673, |
|
"train_runtime": 14.1643, |
|
"train_samples_per_second": 89.309, |
|
"train_steps_per_second": 3.248 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 12.08403491973877, |
|
"learning_rate": 0.0, |
|
"loss": 0.4451, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 58.25, |
|
"eval_f1": 72.1039896331836, |
|
"eval_runtime": 45.0152, |
|
"eval_samples_per_second": 90.014, |
|
"eval_steps_per_second": 3.221, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6900, |
|
"total_flos": 3.780898745780736e+16, |
|
"train_loss": 0.8518442347429801, |
|
"train_runtime": 4369.5998, |
|
"train_samples_per_second": 44.153, |
|
"train_steps_per_second": 1.579 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.780898745780736e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|