|
{ |
|
"best_metric": 44.04121981988171, |
|
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/orig_uzn_roberta_base_model/checkpoint-5430", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 543, |
|
"train_exact_match": 6.993006993006993, |
|
"train_f1": 14.72956145700345, |
|
"train_runtime": 8.9919, |
|
"train_samples_per_second": 114.102, |
|
"train_steps_per_second": 4.115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 22.987104415893555, |
|
"learning_rate": 5e-06, |
|
"loss": 4.7716, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 6.65625, |
|
"eval_f1": 13.460548189578436, |
|
"eval_runtime": 28.875, |
|
"eval_samples_per_second": 112.935, |
|
"eval_steps_per_second": 4.052, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1086, |
|
"train_exact_match": 12.987012987012987, |
|
"train_f1": 21.035135926338754, |
|
"train_runtime": 9.0115, |
|
"train_samples_per_second": 112.523, |
|
"train_steps_per_second": 4.106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 27.96239471435547, |
|
"learning_rate": 1e-05, |
|
"loss": 3.7695, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 10.71875, |
|
"eval_f1": 17.87426936207211, |
|
"eval_runtime": 28.7945, |
|
"eval_samples_per_second": 113.251, |
|
"eval_steps_per_second": 4.063, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1629, |
|
"train_exact_match": 25.174825174825173, |
|
"train_f1": 34.8258899586999, |
|
"train_runtime": 9.0183, |
|
"train_samples_per_second": 113.214, |
|
"train_steps_per_second": 4.103 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 38.839942932128906, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 3.4391, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 15.84375, |
|
"eval_f1": 25.41741224618572, |
|
"eval_runtime": 29.0024, |
|
"eval_samples_per_second": 112.439, |
|
"eval_steps_per_second": 4.034, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2172, |
|
"train_exact_match": 29.97002997002997, |
|
"train_f1": 41.494806937045226, |
|
"train_runtime": 9.0706, |
|
"train_samples_per_second": 112.671, |
|
"train_steps_per_second": 4.079 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 35.55900955200195, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.9975, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 20.4375, |
|
"eval_f1": 31.800313776639523, |
|
"eval_runtime": 28.9344, |
|
"eval_samples_per_second": 112.703, |
|
"eval_steps_per_second": 4.044, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2715, |
|
"train_exact_match": 38.86113886113886, |
|
"train_f1": 51.45493720149751, |
|
"train_runtime": 9.0591, |
|
"train_samples_per_second": 112.483, |
|
"train_steps_per_second": 4.084 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 60.02823257446289, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.6357, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 23.65625, |
|
"eval_f1": 36.81284091086545, |
|
"eval_runtime": 28.8941, |
|
"eval_samples_per_second": 112.861, |
|
"eval_steps_per_second": 4.049, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3258, |
|
"train_exact_match": 47.752247752247754, |
|
"train_f1": 60.68401873806664, |
|
"train_runtime": 8.9792, |
|
"train_samples_per_second": 113.373, |
|
"train_steps_per_second": 4.121 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 37.850128173828125, |
|
"learning_rate": 5e-06, |
|
"loss": 2.3155, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 26.125, |
|
"eval_f1": 40.40349142174044, |
|
"eval_runtime": 29.0794, |
|
"eval_samples_per_second": 112.141, |
|
"eval_steps_per_second": 4.023, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 3801, |
|
"train_exact_match": 50.44955044955045, |
|
"train_f1": 64.76949286642217, |
|
"train_runtime": 8.8774, |
|
"train_samples_per_second": 113.547, |
|
"train_steps_per_second": 4.055 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 51.20785140991211, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 2.0695, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 27.8125, |
|
"eval_f1": 42.61264490274304, |
|
"eval_runtime": 29.0203, |
|
"eval_samples_per_second": 112.37, |
|
"eval_steps_per_second": 4.032, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4344, |
|
"train_exact_match": 51.34865134865135, |
|
"train_f1": 65.11930431582645, |
|
"train_runtime": 8.9923, |
|
"train_samples_per_second": 113.208, |
|
"train_steps_per_second": 4.115 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 56.128509521484375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8975, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 29.125, |
|
"eval_f1": 43.50290837664036, |
|
"eval_runtime": 28.9786, |
|
"eval_samples_per_second": 112.531, |
|
"eval_steps_per_second": 4.037, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 4887, |
|
"train_exact_match": 57.24275724275724, |
|
"train_f1": 69.40786106942967, |
|
"train_runtime": 9.0219, |
|
"train_samples_per_second": 112.615, |
|
"train_steps_per_second": 4.101 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 60.7649040222168, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.7833, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 29.1875, |
|
"eval_f1": 43.93033202135494, |
|
"eval_runtime": 29.1076, |
|
"eval_samples_per_second": 112.033, |
|
"eval_steps_per_second": 4.02, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5430, |
|
"train_exact_match": 58.04195804195804, |
|
"train_f1": 70.9609187667352, |
|
"train_runtime": 9.0761, |
|
"train_samples_per_second": 112.383, |
|
"train_steps_per_second": 4.077 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 42.582576751708984, |
|
"learning_rate": 0.0, |
|
"loss": 1.7125, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 29.1875, |
|
"eval_f1": 44.04121981988171, |
|
"eval_runtime": 28.9802, |
|
"eval_samples_per_second": 112.525, |
|
"eval_steps_per_second": 4.037, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5430, |
|
"total_flos": 1.487387554039296e+16, |
|
"train_loss": 2.7391692272207355, |
|
"train_runtime": 1969.257, |
|
"train_samples_per_second": 77.08, |
|
"train_steps_per_second": 2.757 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.487387554039296e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|