|
{ |
|
"best_metric": 78.54224063649897, |
|
"best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_mdeberta_base_squad_model/checkpoint-2229", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 7430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 743, |
|
"train_exact_match": 60.739260739260736, |
|
"train_f1": 76.79740346661637, |
|
"train_runtime": 18.4418, |
|
"train_samples_per_second": 73.529, |
|
"train_steps_per_second": 2.657 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 12.275191307067871, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1938, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 59.40625, |
|
"eval_f1": 74.4654162201273, |
|
"eval_runtime": 58.6195, |
|
"eval_samples_per_second": 74.583, |
|
"eval_steps_per_second": 2.678, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1486, |
|
"train_exact_match": 71.32867132867133, |
|
"train_f1": 82.88126787145742, |
|
"train_runtime": 18.7819, |
|
"train_samples_per_second": 73.901, |
|
"train_steps_per_second": 2.662 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 17.339393615722656, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9538, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 63.21875, |
|
"eval_f1": 77.06909842105833, |
|
"eval_runtime": 59.7376, |
|
"eval_samples_per_second": 73.187, |
|
"eval_steps_per_second": 2.628, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2229, |
|
"train_exact_match": 72.02797202797203, |
|
"train_f1": 85.06033245209376, |
|
"train_runtime": 19.1902, |
|
"train_samples_per_second": 72.954, |
|
"train_steps_per_second": 2.605 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 31.182722091674805, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.7754, |
|
"step": 2229 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 65.03125, |
|
"eval_f1": 78.54224063649897, |
|
"eval_runtime": 60.2459, |
|
"eval_samples_per_second": 72.569, |
|
"eval_steps_per_second": 2.606, |
|
"step": 2229 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2972, |
|
"train_exact_match": 77.42257742257742, |
|
"train_f1": 89.5672832756712, |
|
"train_runtime": 18.9023, |
|
"train_samples_per_second": 73.483, |
|
"train_steps_per_second": 2.645 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 31.596288681030273, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.6219, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 65.0, |
|
"eval_f1": 78.32854487938292, |
|
"eval_runtime": 59.9399, |
|
"eval_samples_per_second": 72.94, |
|
"eval_steps_per_second": 2.619, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3715, |
|
"train_exact_match": 78.92107892107892, |
|
"train_f1": 90.10872697449445, |
|
"train_runtime": 18.4815, |
|
"train_samples_per_second": 71.964, |
|
"train_steps_per_second": 2.597 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 18.16082000732422, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.5195, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 64.40625, |
|
"eval_f1": 78.09634918973954, |
|
"eval_runtime": 59.7181, |
|
"eval_samples_per_second": 73.211, |
|
"eval_steps_per_second": 2.629, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4458, |
|
"train_exact_match": 82.61738261738262, |
|
"train_f1": 91.68526538584675, |
|
"train_runtime": 18.7765, |
|
"train_samples_per_second": 72.005, |
|
"train_steps_per_second": 2.61 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 23.594913482666016, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4385, |
|
"step": 4458 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 64.59375, |
|
"eval_f1": 77.90198302625599, |
|
"eval_runtime": 60.4426, |
|
"eval_samples_per_second": 72.333, |
|
"eval_steps_per_second": 2.598, |
|
"step": 4458 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5201, |
|
"train_exact_match": 84.81518481518482, |
|
"train_f1": 93.31685648855274, |
|
"train_runtime": 18.5303, |
|
"train_samples_per_second": 72.206, |
|
"train_steps_per_second": 2.59 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 28.661422729492188, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.3756, |
|
"step": 5201 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 64.34375, |
|
"eval_f1": 77.75398362289235, |
|
"eval_runtime": 60.0044, |
|
"eval_samples_per_second": 72.861, |
|
"eval_steps_per_second": 2.616, |
|
"step": 5201 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5944, |
|
"train_exact_match": 84.01598401598402, |
|
"train_f1": 92.96191804754487, |
|
"train_runtime": 19.1375, |
|
"train_samples_per_second": 71.953, |
|
"train_steps_per_second": 2.613 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 36.12919998168945, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3313, |
|
"step": 5944 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 64.40625, |
|
"eval_f1": 77.95679362066461, |
|
"eval_runtime": 59.8689, |
|
"eval_samples_per_second": 73.026, |
|
"eval_steps_per_second": 2.622, |
|
"step": 5944 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6687, |
|
"train_exact_match": 87.01298701298701, |
|
"train_f1": 94.75304040815107, |
|
"train_runtime": 19.6099, |
|
"train_samples_per_second": 71.443, |
|
"train_steps_per_second": 2.601 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.826128959655762, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.2966, |
|
"step": 6687 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 64.40625, |
|
"eval_f1": 77.80886757487892, |
|
"eval_runtime": 59.9613, |
|
"eval_samples_per_second": 72.914, |
|
"eval_steps_per_second": 2.618, |
|
"step": 6687 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7430, |
|
"train_exact_match": 87.81218781218782, |
|
"train_f1": 95.03010006914994, |
|
"train_runtime": 19.1654, |
|
"train_samples_per_second": 71.274, |
|
"train_steps_per_second": 2.557 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 16.050716400146484, |
|
"learning_rate": 0.0, |
|
"loss": 0.2775, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 64.21875, |
|
"eval_f1": 77.80834688072528, |
|
"eval_runtime": 60.7473, |
|
"eval_samples_per_second": 71.97, |
|
"eval_steps_per_second": 2.584, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7430, |
|
"total_flos": 4.073559349625856e+16, |
|
"train_loss": 0.5783894863616409, |
|
"train_runtime": 6220.8095, |
|
"train_samples_per_second": 33.414, |
|
"train_steps_per_second": 1.194 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4.073559349625856e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|