|
{ |
|
"best_metric": 0.13407668471336365, |
|
"best_model_checkpoint": "/data/wheld3/mt5-small-pointer-adv-mtop/checkpoint-3000", |
|
"epoch": 16.304012214776485, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 2.1628, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 0.0022371364653243847, |
|
"eval_loss": 0.7204959988594055, |
|
"eval_runtime": 75.5591, |
|
"eval_samples_per_second": 29.58, |
|
"eval_steps_per_second": 3.706, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 1.1208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 0.0013422818791946308, |
|
"eval_loss": 0.63932865858078, |
|
"eval_runtime": 78.5979, |
|
"eval_samples_per_second": 28.436, |
|
"eval_steps_per_second": 3.562, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0008, |
|
"loss": 0.8675, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_exact_match": 0.0026845637583892616, |
|
"eval_loss": 0.5905107259750366, |
|
"eval_runtime": 71.821, |
|
"eval_samples_per_second": 31.119, |
|
"eval_steps_per_second": 3.899, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 1.8729, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_exact_match": 0.003131991051454139, |
|
"eval_loss": 0.5726307034492493, |
|
"eval_runtime": 78.7696, |
|
"eval_samples_per_second": 28.374, |
|
"eval_steps_per_second": 3.555, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 3.5417, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_exact_match": 0.006711409395973154, |
|
"eval_loss": 0.5370941758155823, |
|
"eval_runtime": 81.2185, |
|
"eval_samples_per_second": 27.518, |
|
"eval_steps_per_second": 3.447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.0006, |
|
"loss": 0.9087, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_exact_match": 0.11185682326621924, |
|
"eval_loss": 0.3511998653411865, |
|
"eval_runtime": 82.0065, |
|
"eval_samples_per_second": 27.254, |
|
"eval_steps_per_second": 3.414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 1.2224, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_exact_match": 0.19105145413870245, |
|
"eval_loss": 0.27385014295578003, |
|
"eval_runtime": 82.7127, |
|
"eval_samples_per_second": 27.021, |
|
"eval_steps_per_second": 3.385, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.7597, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_exact_match": 0.30156599552572705, |
|
"eval_loss": 0.21514081954956055, |
|
"eval_runtime": 82.6161, |
|
"eval_samples_per_second": 27.053, |
|
"eval_steps_per_second": 3.389, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0004, |
|
"loss": 0.6981, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_exact_match": 0.3749440715883669, |
|
"eval_loss": 0.17362748086452484, |
|
"eval_runtime": 82.0736, |
|
"eval_samples_per_second": 27.232, |
|
"eval_steps_per_second": 3.412, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.4779, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_exact_match": 0.41655480984340043, |
|
"eval_loss": 0.15482261776924133, |
|
"eval_runtime": 82.6077, |
|
"eval_samples_per_second": 27.056, |
|
"eval_steps_per_second": 3.39, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.4397, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"eval_exact_match": 0.45100671140939597, |
|
"eval_loss": 0.13771148025989532, |
|
"eval_runtime": 79.9201, |
|
"eval_samples_per_second": 27.965, |
|
"eval_steps_per_second": 3.503, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4101, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_exact_match": 0.4196868008948546, |
|
"eval_loss": 0.14801675081253052, |
|
"eval_runtime": 81.9056, |
|
"eval_samples_per_second": 27.288, |
|
"eval_steps_per_second": 3.419, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.3323, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"eval_exact_match": 0.43982102908277404, |
|
"eval_loss": 0.13956378400325775, |
|
"eval_runtime": 80.3363, |
|
"eval_samples_per_second": 27.821, |
|
"eval_steps_per_second": 3.485, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2565, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_exact_match": 0.45234899328859063, |
|
"eval_loss": 0.13505251705646515, |
|
"eval_runtime": 80.6724, |
|
"eval_samples_per_second": 27.705, |
|
"eval_steps_per_second": 3.471, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 0.0, |
|
"loss": 0.2108, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"eval_exact_match": 0.4541387024608501, |
|
"eval_loss": 0.13407668471336365, |
|
"eval_runtime": 80.1776, |
|
"eval_samples_per_second": 27.876, |
|
"eval_steps_per_second": 3.492, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"step": 3000, |
|
"total_flos": 3.3508283441823776e+16, |
|
"train_loss": 1.0188030764261882, |
|
"train_runtime": 23687.0417, |
|
"train_samples_per_second": 64.846, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 17, |
|
"total_flos": 3.3508283441823776e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|