|
{ |
|
"best_metric": 78.2193331519916, |
|
"best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_large_model/checkpoint-2760", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 690, |
|
"train_exact_match": 56.043956043956044, |
|
"train_f1": 73.10238632897224, |
|
"train_runtime": 28.967, |
|
"train_samples_per_second": 43.567, |
|
"train_steps_per_second": 1.588 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 34.386253356933594, |
|
"learning_rate": 1e-05, |
|
"loss": 3.1538, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 53.28125, |
|
"eval_f1": 68.90784509260168, |
|
"eval_runtime": 92.5664, |
|
"eval_samples_per_second": 43.774, |
|
"eval_steps_per_second": 1.566, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1380, |
|
"train_exact_match": 71.22877122877122, |
|
"train_f1": 84.717686864544, |
|
"train_runtime": 29.1704, |
|
"train_samples_per_second": 43.64, |
|
"train_steps_per_second": 1.577 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 118.70819854736328, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.105, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 63.1875, |
|
"eval_f1": 77.04679698006159, |
|
"eval_runtime": 92.8422, |
|
"eval_samples_per_second": 43.644, |
|
"eval_steps_per_second": 1.562, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2070, |
|
"train_exact_match": 76.62337662337663, |
|
"train_f1": 88.54201062497397, |
|
"train_runtime": 29.7625, |
|
"train_samples_per_second": 43.511, |
|
"train_steps_per_second": 1.579 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 41.994937896728516, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7535, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 64.125, |
|
"eval_f1": 77.84371511436862, |
|
"eval_runtime": 92.7563, |
|
"eval_samples_per_second": 43.684, |
|
"eval_steps_per_second": 1.563, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2760, |
|
"train_exact_match": 82.01798201798202, |
|
"train_f1": 91.71025821891249, |
|
"train_runtime": 29.7394, |
|
"train_samples_per_second": 43.545, |
|
"train_steps_per_second": 1.58 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 416.8191223144531, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5495, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 65.03125, |
|
"eval_f1": 78.2193331519916, |
|
"eval_runtime": 92.5461, |
|
"eval_samples_per_second": 43.784, |
|
"eval_steps_per_second": 1.567, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3450, |
|
"train_exact_match": 83.71628371628371, |
|
"train_f1": 92.89072366821784, |
|
"train_runtime": 28.6071, |
|
"train_samples_per_second": 43.311, |
|
"train_steps_per_second": 1.573 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.402435779571533, |
|
"learning_rate": 0.0, |
|
"loss": 0.418, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 64.875, |
|
"eval_f1": 78.00354149557015, |
|
"eval_runtime": 92.9043, |
|
"eval_samples_per_second": 43.615, |
|
"eval_steps_per_second": 1.561, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3450, |
|
"total_flos": 6.719078999672064e+16, |
|
"train_loss": 1.1959592426687047, |
|
"train_runtime": 6180.983, |
|
"train_samples_per_second": 15.607, |
|
"train_steps_per_second": 0.558 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6.719078999672064e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|