|
{ |
|
"best_metric": 78.83324934908322, |
|
"best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/tr_kaz_xlm_roberta_base_model/checkpoint-7490", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 7490, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 749, |
|
"train_exact_match": 24.275724275724276, |
|
"train_f1": 43.117743763484455, |
|
"train_runtime": 19.0896, |
|
"train_samples_per_second": 79.624, |
|
"train_steps_per_second": 2.881 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 45.69759750366211, |
|
"learning_rate": 5e-06, |
|
"loss": 4.4853, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 25.875, |
|
"eval_f1": 44.604620754612284, |
|
"eval_runtime": 56.7402, |
|
"eval_samples_per_second": 81.389, |
|
"eval_steps_per_second": 2.908, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1498, |
|
"train_exact_match": 55.744255744255746, |
|
"train_f1": 73.08153342630565, |
|
"train_runtime": 17.3477, |
|
"train_samples_per_second": 85.429, |
|
"train_steps_per_second": 3.055 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 27.808273315429688, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7458, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 52.15625, |
|
"eval_f1": 70.19574840484874, |
|
"eval_runtime": 53.2765, |
|
"eval_samples_per_second": 86.68, |
|
"eval_steps_per_second": 3.097, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2247, |
|
"train_exact_match": 65.83416583416583, |
|
"train_f1": 81.64944541515409, |
|
"train_runtime": 17.576, |
|
"train_samples_per_second": 85.173, |
|
"train_steps_per_second": 3.072 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 26.646291732788086, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.1281, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 58.0625, |
|
"eval_f1": 75.09391733824289, |
|
"eval_runtime": 56.1114, |
|
"eval_samples_per_second": 82.301, |
|
"eval_steps_per_second": 2.941, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2996, |
|
"train_exact_match": 69.43056943056943, |
|
"train_f1": 84.4692905630636, |
|
"train_runtime": 19.0377, |
|
"train_samples_per_second": 79.106, |
|
"train_steps_per_second": 2.836 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 22.081819534301758, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.8784, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 60.25, |
|
"eval_f1": 76.8410082950037, |
|
"eval_runtime": 57.4346, |
|
"eval_samples_per_second": 80.405, |
|
"eval_steps_per_second": 2.873, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3745, |
|
"train_exact_match": 74.12587412587412, |
|
"train_f1": 87.57278612666818, |
|
"train_runtime": 18.9437, |
|
"train_samples_per_second": 78.971, |
|
"train_steps_per_second": 2.851 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 27.527299880981445, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.7365, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 60.96875, |
|
"eval_f1": 77.33365402370875, |
|
"eval_runtime": 57.2578, |
|
"eval_samples_per_second": 80.653, |
|
"eval_steps_per_second": 2.882, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4494, |
|
"train_exact_match": 78.42157842157842, |
|
"train_f1": 89.99797070222537, |
|
"train_runtime": 19.0634, |
|
"train_samples_per_second": 78.265, |
|
"train_steps_per_second": 2.833 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 26.452974319458008, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6194, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 61.96875, |
|
"eval_f1": 77.91252095789356, |
|
"eval_runtime": 57.9105, |
|
"eval_samples_per_second": 79.744, |
|
"eval_steps_per_second": 2.849, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5243, |
|
"train_exact_match": 82.81718281718281, |
|
"train_f1": 92.72768225182865, |
|
"train_runtime": 18.2446, |
|
"train_samples_per_second": 80.956, |
|
"train_steps_per_second": 2.905 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 18.254165649414062, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.534, |
|
"step": 5243 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 62.21875, |
|
"eval_f1": 78.17772368782505, |
|
"eval_runtime": 55.9458, |
|
"eval_samples_per_second": 82.544, |
|
"eval_steps_per_second": 2.949, |
|
"step": 5243 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5992, |
|
"train_exact_match": 80.41958041958041, |
|
"train_f1": 91.49491612552417, |
|
"train_runtime": 18.2728, |
|
"train_samples_per_second": 84.059, |
|
"train_steps_per_second": 3.01 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 35.931488037109375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4747, |
|
"step": 5992 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 62.53125, |
|
"eval_f1": 78.2110139694925, |
|
"eval_runtime": 56.5716, |
|
"eval_samples_per_second": 81.631, |
|
"eval_steps_per_second": 2.917, |
|
"step": 5992 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6741, |
|
"train_exact_match": 82.61738261738262, |
|
"train_f1": 92.86226576706642, |
|
"train_runtime": 18.569, |
|
"train_samples_per_second": 79.218, |
|
"train_steps_per_second": 2.854 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 60.296566009521484, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.4304, |
|
"step": 6741 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 63.0625, |
|
"eval_f1": 78.5170445431728, |
|
"eval_runtime": 57.3991, |
|
"eval_samples_per_second": 80.454, |
|
"eval_steps_per_second": 2.875, |
|
"step": 6741 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7490, |
|
"train_exact_match": 84.91508491508492, |
|
"train_f1": 93.37423644646913, |
|
"train_runtime": 18.377, |
|
"train_samples_per_second": 81.297, |
|
"train_steps_per_second": 2.938 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 23.199573516845703, |
|
"learning_rate": 0.0, |
|
"loss": 0.3964, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 63.28125, |
|
"eval_f1": 78.83324934908322, |
|
"eval_runtime": 55.9599, |
|
"eval_samples_per_second": 82.523, |
|
"eval_steps_per_second": 2.949, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7490, |
|
"total_flos": 4.108760851295232e+16, |
|
"train_loss": 1.1428948192316317, |
|
"train_runtime": 4968.485, |
|
"train_samples_per_second": 42.198, |
|
"train_steps_per_second": 1.508 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7490, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4.108760851295232e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|