|
{ |
|
"best_metric": 83.11268652174739, |
|
"best_model_checkpoint": "/home/fbravo/data/all_results/sqac/albeto_xxlarge/epochs_3_bs_16_lr_5e-6/checkpoint-2000", |
|
"epoch": 3.0, |
|
"global_step": 3114, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_exact_match": 50.21459227467811, |
|
"eval_f1": 72.15606343917293, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_exact_match": 57.8862660944206, |
|
"eval_f1": 77.27610294693481, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.205202312138729e-06, |
|
"loss": 1.7198, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_exact_match": 61.26609442060086, |
|
"eval_f1": 79.60226388707036, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 60.56866952789699, |
|
"eval_f1": 80.21878308712405, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.402376364804111e-06, |
|
"loss": 0.9328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_exact_match": 64.10944206008584, |
|
"eval_f1": 81.45706053754708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_exact_match": 64.431330472103, |
|
"eval_f1": 82.10448987052378, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_exact_match": 65.93347639484979, |
|
"eval_f1": 82.46788441647152, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.599550417469493e-06, |
|
"loss": 0.5496, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_exact_match": 65.50429184549357, |
|
"eval_f1": 82.69463199305457, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_exact_match": 65.50429184549357, |
|
"eval_f1": 82.47924185410717, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.798330122029544e-06, |
|
"loss": 0.5137, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_exact_match": 66.1480686695279, |
|
"eval_f1": 83.11268652174739, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_exact_match": 66.25536480686695, |
|
"eval_f1": 82.81247091268429, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_exact_match": 65.98712446351931, |
|
"eval_f1": 82.52306189501985, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.955041746949263e-07, |
|
"loss": 0.2552, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_exact_match": 66.04077253218884, |
|
"eval_f1": 82.98092243697893, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_exact_match": 66.46995708154506, |
|
"eval_f1": 82.55808756053281, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9267822736030828e-07, |
|
"loss": 0.2252, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_exact_match": 66.25536480686695, |
|
"eval_f1": 82.8710129087105, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 3114, |
|
"total_flos": 2.835542958722253e+16, |
|
"train_loss": 0.6812070615029748, |
|
"train_runtime": 1785.0892, |
|
"train_samples_per_second": 27.894, |
|
"train_steps_per_second": 1.744 |
|
} |
|
], |
|
"max_steps": 3114, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.835542958722253e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|