med-alex's picture
End of training
cbd4659 verified
{
"best_metric": 70.80451562424408,
"best_model_checkpoint": "/root/turkic_qa/en_kaz_models/en_kaz_xlm_roberta_base_squad_model/checkpoint-3260",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 6520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 652,
"train_exact_match": 54.645354645354644,
"train_f1": 71.43920046551005,
"train_runtime": 13.6364,
"train_samples_per_second": 89.98,
"train_steps_per_second": 3.227
},
{
"epoch": 1.0,
"grad_norm": 42.09185028076172,
"learning_rate": 5e-06,
"loss": 1.6894,
"step": 652
},
{
"epoch": 1.0,
"eval_exact_match": 51.0,
"eval_f1": 66.75535596211667,
"eval_runtime": 42.9083,
"eval_samples_per_second": 89.913,
"eval_steps_per_second": 3.216,
"step": 652
},
{
"epoch": 2.0,
"step": 1304,
"train_exact_match": 60.03996003996004,
"train_f1": 74.97586392957278,
"train_runtime": 13.9152,
"train_samples_per_second": 89.183,
"train_steps_per_second": 3.234
},
{
"epoch": 2.0,
"grad_norm": 25.847925186157227,
"learning_rate": 1e-05,
"loss": 1.3571,
"step": 1304
},
{
"epoch": 2.0,
"eval_exact_match": 54.0625,
"eval_f1": 69.25389311582437,
"eval_runtime": 43.0882,
"eval_samples_per_second": 89.537,
"eval_steps_per_second": 3.203,
"step": 1304
},
{
"epoch": 3.0,
"step": 1956,
"train_exact_match": 68.23176823176823,
"train_f1": 81.76253341059837,
"train_runtime": 13.5561,
"train_samples_per_second": 88.964,
"train_steps_per_second": 3.246
},
{
"epoch": 3.0,
"grad_norm": 34.729644775390625,
"learning_rate": 8.750000000000001e-06,
"loss": 1.1643,
"step": 1956
},
{
"epoch": 3.0,
"eval_exact_match": 54.9375,
"eval_f1": 69.89098890413919,
"eval_runtime": 43.0049,
"eval_samples_per_second": 89.711,
"eval_steps_per_second": 3.209,
"step": 1956
},
{
"epoch": 4.0,
"step": 2608,
"train_exact_match": 71.82817182817183,
"train_f1": 85.225535613071,
"train_runtime": 14.2911,
"train_samples_per_second": 88.167,
"train_steps_per_second": 3.149
},
{
"epoch": 4.0,
"grad_norm": 33.257789611816406,
"learning_rate": 7.500000000000001e-06,
"loss": 0.9703,
"step": 2608
},
{
"epoch": 4.0,
"eval_exact_match": 56.1875,
"eval_f1": 70.69884423902388,
"eval_runtime": 43.7869,
"eval_samples_per_second": 88.109,
"eval_steps_per_second": 3.152,
"step": 2608
},
{
"epoch": 5.0,
"step": 3260,
"train_exact_match": 76.42357642357642,
"train_f1": 88.7083439838053,
"train_runtime": 13.5599,
"train_samples_per_second": 89.234,
"train_steps_per_second": 3.245
},
{
"epoch": 5.0,
"grad_norm": 29.723169326782227,
"learning_rate": 6.25e-06,
"loss": 0.8246,
"step": 3260
},
{
"epoch": 5.0,
"eval_exact_match": 56.53125,
"eval_f1": 70.80451562424408,
"eval_runtime": 43.1752,
"eval_samples_per_second": 89.357,
"eval_steps_per_second": 3.196,
"step": 3260
},
{
"epoch": 6.0,
"step": 3912,
"train_exact_match": 77.32267732267732,
"train_f1": 89.41850450829368,
"train_runtime": 13.3805,
"train_samples_per_second": 87.964,
"train_steps_per_second": 3.214
},
{
"epoch": 6.0,
"grad_norm": 43.614688873291016,
"learning_rate": 5e-06,
"loss": 0.7106,
"step": 3912
},
{
"epoch": 6.0,
"eval_exact_match": 56.28125,
"eval_f1": 70.69128089372614,
"eval_runtime": 43.3271,
"eval_samples_per_second": 89.044,
"eval_steps_per_second": 3.185,
"step": 3912
},
{
"epoch": 7.0,
"step": 4564,
"train_exact_match": 80.31968031968032,
"train_f1": 90.9932970975269,
"train_runtime": 13.7154,
"train_samples_per_second": 88.222,
"train_steps_per_second": 3.208
},
{
"epoch": 7.0,
"grad_norm": 16.68464469909668,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.6206,
"step": 4564
},
{
"epoch": 7.0,
"eval_exact_match": 56.28125,
"eval_f1": 70.50206404433744,
"eval_runtime": 43.2324,
"eval_samples_per_second": 89.239,
"eval_steps_per_second": 3.192,
"step": 4564
},
{
"epoch": 8.0,
"step": 5216,
"train_exact_match": 82.41758241758242,
"train_f1": 92.16600808934162,
"train_runtime": 13.6769,
"train_samples_per_second": 88.105,
"train_steps_per_second": 3.217
},
{
"epoch": 8.0,
"grad_norm": 30.79519271850586,
"learning_rate": 2.5e-06,
"loss": 0.5603,
"step": 5216
},
{
"epoch": 8.0,
"eval_exact_match": 56.40625,
"eval_f1": 70.75408379302462,
"eval_runtime": 43.2887,
"eval_samples_per_second": 89.123,
"eval_steps_per_second": 3.188,
"step": 5216
},
{
"epoch": 9.0,
"step": 5868,
"train_exact_match": 83.61638361638362,
"train_f1": 93.40487188804495,
"train_runtime": 13.7502,
"train_samples_per_second": 88.508,
"train_steps_per_second": 3.2
},
{
"epoch": 9.0,
"grad_norm": 45.799346923828125,
"learning_rate": 1.25e-06,
"loss": 0.5138,
"step": 5868
},
{
"epoch": 9.0,
"eval_exact_match": 56.46875,
"eval_f1": 70.54295738591223,
"eval_runtime": 43.2261,
"eval_samples_per_second": 89.252,
"eval_steps_per_second": 3.193,
"step": 5868
},
{
"epoch": 10.0,
"step": 6520,
"train_exact_match": 84.31568431568432,
"train_f1": 93.73109223264476,
"train_runtime": 13.3013,
"train_samples_per_second": 88.563,
"train_steps_per_second": 3.233
},
{
"epoch": 10.0,
"grad_norm": 22.400699615478516,
"learning_rate": 0.0,
"loss": 0.4785,
"step": 6520
},
{
"epoch": 10.0,
"eval_exact_match": 56.34375,
"eval_f1": 70.56177522205788,
"eval_runtime": 43.1408,
"eval_samples_per_second": 89.428,
"eval_steps_per_second": 3.199,
"step": 6520
},
{
"epoch": 10.0,
"step": 6520,
"total_flos": 3.575911440121344e+16,
"train_loss": 0.8889602286684001,
"train_runtime": 4193.6079,
"train_samples_per_second": 43.511,
"train_steps_per_second": 1.555
}
],
"logging_steps": 500,
"max_steps": 6520,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.575911440121344e+16,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}