|
{ |
|
"best_metric": 0.12814190983772278, |
|
"best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-mtop/checkpoint-1000", |
|
"epoch": 16.304012214776485, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.7704, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 0.13154362416107382, |
|
"eval_loss": 0.3664160668849945, |
|
"eval_runtime": 115.2843, |
|
"eval_samples_per_second": 19.387, |
|
"eval_steps_per_second": 2.429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 1.9751, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 0.3400447427293065, |
|
"eval_loss": 0.20914442837238312, |
|
"eval_runtime": 116.9108, |
|
"eval_samples_per_second": 19.117, |
|
"eval_steps_per_second": 2.395, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0008, |
|
"loss": 1.0019, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_exact_match": 0.45861297539149887, |
|
"eval_loss": 0.14529764652252197, |
|
"eval_runtime": 115.0721, |
|
"eval_samples_per_second": 19.423, |
|
"eval_steps_per_second": 2.433, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 1.313, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_exact_match": 0.5064876957494407, |
|
"eval_loss": 0.13125699758529663, |
|
"eval_runtime": 116.3825, |
|
"eval_samples_per_second": 19.204, |
|
"eval_steps_per_second": 2.406, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.6593, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_exact_match": 0.5266219239373602, |
|
"eval_loss": 0.12814190983772278, |
|
"eval_runtime": 116.3912, |
|
"eval_samples_per_second": 19.202, |
|
"eval_steps_per_second": 2.406, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.0006, |
|
"loss": 0.3216, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_exact_match": 0.5252796420581656, |
|
"eval_loss": 0.13165239989757538, |
|
"eval_runtime": 116.4297, |
|
"eval_samples_per_second": 19.196, |
|
"eval_steps_per_second": 2.405, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.4614, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_exact_match": 0.5261744966442953, |
|
"eval_loss": 0.1507694572210312, |
|
"eval_runtime": 116.8619, |
|
"eval_samples_per_second": 19.125, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.3577, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_exact_match": 0.5360178970917227, |
|
"eval_loss": 0.1421622931957245, |
|
"eval_runtime": 121.8344, |
|
"eval_samples_per_second": 18.345, |
|
"eval_steps_per_second": 2.298, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0004, |
|
"loss": 0.3748, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_exact_match": 0.5458612975391499, |
|
"eval_loss": 0.14189742505550385, |
|
"eval_runtime": 119.5737, |
|
"eval_samples_per_second": 18.691, |
|
"eval_steps_per_second": 2.342, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.2422, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_exact_match": 0.5355704697986577, |
|
"eval_loss": 0.16032171249389648, |
|
"eval_runtime": 125.8745, |
|
"eval_samples_per_second": 17.756, |
|
"eval_steps_per_second": 2.224, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.4443, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"eval_exact_match": 0.5472035794183445, |
|
"eval_loss": 0.15260477364063263, |
|
"eval_runtime": 118.267, |
|
"eval_samples_per_second": 18.898, |
|
"eval_steps_per_second": 2.368, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2671, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_exact_match": 0.5480984340044742, |
|
"eval_loss": 0.16060054302215576, |
|
"eval_runtime": 118.1286, |
|
"eval_samples_per_second": 18.92, |
|
"eval_steps_per_second": 2.37, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.227, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"eval_exact_match": 0.5440715883668904, |
|
"eval_loss": 0.1774316281080246, |
|
"eval_runtime": 119.688, |
|
"eval_samples_per_second": 18.674, |
|
"eval_steps_per_second": 2.339, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2053, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_exact_match": 0.5440715883668904, |
|
"eval_loss": 0.1752384901046753, |
|
"eval_runtime": 119.0856, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 2.351, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 0.0, |
|
"loss": 0.1517, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"eval_exact_match": 0.5480984340044742, |
|
"eval_loss": 0.1770186424255371, |
|
"eval_runtime": 119.4908, |
|
"eval_samples_per_second": 18.704, |
|
"eval_steps_per_second": 2.343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"step": 3000, |
|
"total_flos": 7.59685289063286e+16, |
|
"train_loss": 0.6515167045593262, |
|
"train_runtime": 35508.9264, |
|
"train_samples_per_second": 43.257, |
|
"train_steps_per_second": 0.084 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 17, |
|
"total_flos": 7.59685289063286e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|