|
{ |
|
"best_metric": 82.66116908888208, |
|
"best_model_checkpoint": "/root/turkic_qa/tr_uzn_models/tr_uzn_xlm_roberta_large_model/checkpoint-3248", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 4060, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 812, |
|
"train_exact_match": 61.63836163836164, |
|
"train_f1": 79.31791325882773, |
|
"train_runtime": 38.4548, |
|
"train_samples_per_second": 43.168, |
|
"train_steps_per_second": 1.56 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 30.129222869873047, |
|
"learning_rate": 1e-05, |
|
"loss": 2.7411, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 59.46875, |
|
"eval_f1": 77.35390101597599, |
|
"eval_runtime": 118.1334, |
|
"eval_samples_per_second": 43.265, |
|
"eval_steps_per_second": 1.549, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1624, |
|
"train_exact_match": 75.62437562437563, |
|
"train_f1": 87.86513543878338, |
|
"train_runtime": 37.8364, |
|
"train_samples_per_second": 43.001, |
|
"train_steps_per_second": 1.559 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 113.6387710571289, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.8103, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 66.59375, |
|
"eval_f1": 81.70014057851566, |
|
"eval_runtime": 118.1729, |
|
"eval_samples_per_second": 43.25, |
|
"eval_steps_per_second": 1.549, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2436, |
|
"train_exact_match": 81.31868131868131, |
|
"train_f1": 91.60071635098949, |
|
"train_runtime": 36.5906, |
|
"train_samples_per_second": 43.153, |
|
"train_steps_per_second": 1.558 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 58.30486297607422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5305, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 66.9375, |
|
"eval_f1": 82.51782698770623, |
|
"eval_runtime": 118.3438, |
|
"eval_samples_per_second": 43.188, |
|
"eval_steps_per_second": 1.546, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3248, |
|
"train_exact_match": 83.61638361638362, |
|
"train_f1": 93.04689171723578, |
|
"train_runtime": 36.8939, |
|
"train_samples_per_second": 43.178, |
|
"train_steps_per_second": 1.545 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 11.345389366149902, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3795, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 67.8125, |
|
"eval_f1": 82.66116908888208, |
|
"eval_runtime": 118.298, |
|
"eval_samples_per_second": 43.204, |
|
"eval_steps_per_second": 1.547, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 4060, |
|
"train_exact_match": 86.91308691308691, |
|
"train_f1": 93.86544501027228, |
|
"train_runtime": 36.964, |
|
"train_samples_per_second": 43.177, |
|
"train_steps_per_second": 1.542 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 37.001564025878906, |
|
"learning_rate": 0.0, |
|
"loss": 0.2878, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 67.5625, |
|
"eval_f1": 82.48067936529503, |
|
"eval_runtime": 118.5494, |
|
"eval_samples_per_second": 43.113, |
|
"eval_steps_per_second": 1.544, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 4060, |
|
"total_flos": 7.910493982198272e+16, |
|
"train_loss": 0.9498688984387027, |
|
"train_runtime": 7336.9641, |
|
"train_samples_per_second": 15.479, |
|
"train_steps_per_second": 0.553 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4060, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 7.910493982198272e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|