|
{ |
|
"best_metric": 96.7742, |
|
"best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_tapex_e2_codet5p-220m_latex/checkpoint-2400", |
|
"epoch": 1.8608919814703544, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.070409429280397e-05, |
|
"loss": 1.8932, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_exact_match": 69.7581, |
|
"eval_loss": 0.006803931202739477, |
|
"eval_runtime": 182.7837, |
|
"eval_samples_per_second": 2.735, |
|
"eval_steps_per_second": 0.175, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.140818858560795e-05, |
|
"loss": 0.0034, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_exact_match": 86.4919, |
|
"eval_loss": 0.0015783495036885142, |
|
"eval_runtime": 139.3342, |
|
"eval_samples_per_second": 3.588, |
|
"eval_steps_per_second": 0.23, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.211228287841191e-05, |
|
"loss": 0.0012, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_exact_match": 92.5403, |
|
"eval_loss": 0.00091337546473369, |
|
"eval_runtime": 129.2451, |
|
"eval_samples_per_second": 3.869, |
|
"eval_steps_per_second": 0.248, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.28163771712159e-05, |
|
"loss": 0.0008, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_exact_match": 94.3548, |
|
"eval_loss": 0.0006592237623408437, |
|
"eval_runtime": 130.2486, |
|
"eval_samples_per_second": 3.839, |
|
"eval_steps_per_second": 0.246, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.352047146401985e-05, |
|
"loss": 0.0006, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 94.5565, |
|
"eval_loss": 0.0005109157646074891, |
|
"eval_runtime": 127.9056, |
|
"eval_samples_per_second": 3.909, |
|
"eval_steps_per_second": 0.25, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.422456575682382e-05, |
|
"loss": 0.0004, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_exact_match": 95.3629, |
|
"eval_loss": 0.0004209030594211072, |
|
"eval_runtime": 129.0896, |
|
"eval_samples_per_second": 3.873, |
|
"eval_steps_per_second": 0.248, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.4928660049627796e-05, |
|
"loss": 0.0004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_exact_match": 95.1613, |
|
"eval_loss": 0.00042048218892887235, |
|
"eval_runtime": 129.0282, |
|
"eval_samples_per_second": 3.875, |
|
"eval_steps_per_second": 0.248, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.563275434243176e-05, |
|
"loss": 0.0003, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_exact_match": 96.7742, |
|
"eval_loss": 0.0003429962380323559, |
|
"eval_runtime": 127.2593, |
|
"eval_samples_per_second": 3.929, |
|
"eval_steps_per_second": 0.251, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.6336848635235734e-05, |
|
"loss": 0.0003, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_exact_match": 96.7742, |
|
"eval_loss": 0.00032032810850068927, |
|
"eval_runtime": 126.2849, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.253, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.040942928039701e-06, |
|
"loss": 0.0003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 96.371, |
|
"eval_loss": 0.0003172786091454327, |
|
"eval_runtime": 128.5411, |
|
"eval_samples_per_second": 3.89, |
|
"eval_steps_per_second": 0.249, |
|
"step": 3000 |
|
} |
|
], |
|
"max_steps": 3224, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.7417880393456026e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|