med-alex's picture
End of training
1d9b705 verified
{
"best_metric": 80.33013168664009,
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_squad_model/checkpoint-2790",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 2790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 558,
"train_exact_match": 64.83516483516483,
"train_f1": 83.08204401574274,
"train_runtime": 24.0955,
"train_samples_per_second": 43.826,
"train_steps_per_second": 1.577
},
{
"epoch": 1.0,
"grad_norm": 80.57772827148438,
"learning_rate": 1e-05,
"loss": 1.2698,
"step": 558
},
{
"epoch": 1.0,
"eval_exact_match": 61.25,
"eval_f1": 79.38877529103148,
"eval_runtime": 76.3721,
"eval_samples_per_second": 43.982,
"eval_steps_per_second": 1.571,
"step": 558
},
{
"epoch": 2.0,
"step": 1116,
"train_exact_match": 74.82517482517483,
"train_f1": 89.2404305639639,
"train_runtime": 24.2914,
"train_samples_per_second": 42.855,
"train_steps_per_second": 1.564
},
{
"epoch": 2.0,
"grad_norm": 80.02055358886719,
"learning_rate": 7.500000000000001e-06,
"loss": 0.8886,
"step": 1116
},
{
"epoch": 2.0,
"eval_exact_match": 62.3125,
"eval_f1": 80.2126480616015,
"eval_runtime": 77.9968,
"eval_samples_per_second": 43.066,
"eval_steps_per_second": 1.539,
"step": 1116
},
{
"epoch": 3.0,
"step": 1674,
"train_exact_match": 78.52147852147853,
"train_f1": 92.40489073606565,
"train_runtime": 25.6503,
"train_samples_per_second": 41.013,
"train_steps_per_second": 1.481
},
{
"epoch": 3.0,
"grad_norm": 28.389789581298828,
"learning_rate": 5e-06,
"loss": 0.6286,
"step": 1674
},
{
"epoch": 3.0,
"eval_exact_match": 61.9375,
"eval_f1": 80.06485926873198,
"eval_runtime": 83.2186,
"eval_samples_per_second": 40.364,
"eval_steps_per_second": 1.442,
"step": 1674
},
{
"epoch": 4.0,
"step": 2232,
"train_exact_match": 82.31768231768231,
"train_f1": 93.82612819903538,
"train_runtime": 23.9529,
"train_samples_per_second": 43.669,
"train_steps_per_second": 1.586
},
{
"epoch": 4.0,
"grad_norm": 21.804250717163086,
"learning_rate": 2.5e-06,
"loss": 0.4725,
"step": 2232
},
{
"epoch": 4.0,
"eval_exact_match": 62.625,
"eval_f1": 80.25175708657538,
"eval_runtime": 77.0148,
"eval_samples_per_second": 43.615,
"eval_steps_per_second": 1.558,
"step": 2232
},
{
"epoch": 5.0,
"step": 2790,
"train_exact_match": 85.21478521478521,
"train_f1": 94.39980772824222,
"train_runtime": 24.3057,
"train_samples_per_second": 42.871,
"train_steps_per_second": 1.563
},
{
"epoch": 5.0,
"grad_norm": 48.04233932495117,
"learning_rate": 0.0,
"loss": 0.378,
"step": 2790
},
{
"epoch": 5.0,
"eval_exact_match": 62.46875,
"eval_f1": 80.33013168664009,
"eval_runtime": 78.9765,
"eval_samples_per_second": 42.532,
"eval_steps_per_second": 1.519,
"step": 2790
},
{
"epoch": 5.0,
"step": 2790,
"total_flos": 5.432587638826752e+16,
"train_loss": 0.7275064119728663,
"train_runtime": 5090.5681,
"train_samples_per_second": 15.321,
"train_steps_per_second": 0.548
}
],
"logging_steps": 500,
"max_steps": 2790,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 5.432587638826752e+16,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}