|
{ |
|
"best_metric": 0.11308582127094269, |
|
"best_model_checkpoint": "/data/wheld3/mt5-base-pointer-mtop/checkpoint-1200", |
|
"epoch": 99.9800918836141, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.7749, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_exact_match": 0.003131991051454139, |
|
"eval_loss": 0.5892038345336914, |
|
"eval_runtime": 135.5401, |
|
"eval_samples_per_second": 16.49, |
|
"eval_steps_per_second": 2.066, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.6021, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_exact_match": 0.013870246085011185, |
|
"eval_loss": 0.5159956216812134, |
|
"eval_runtime": 130.613, |
|
"eval_samples_per_second": 17.112, |
|
"eval_steps_per_second": 2.144, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 0.0008, |
|
"loss": 0.6044, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"eval_exact_match": 0.053243847874720356, |
|
"eval_loss": 0.40801870822906494, |
|
"eval_runtime": 132.1423, |
|
"eval_samples_per_second": 16.914, |
|
"eval_steps_per_second": 2.119, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.3302, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"eval_exact_match": 0.36196868008948546, |
|
"eval_loss": 0.18654391169548035, |
|
"eval_runtime": 134.276, |
|
"eval_samples_per_second": 16.645, |
|
"eval_steps_per_second": 2.085, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.1483, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_exact_match": 0.5105145413870246, |
|
"eval_loss": 0.1267053484916687, |
|
"eval_runtime": 133.0912, |
|
"eval_samples_per_second": 16.793, |
|
"eval_steps_per_second": 2.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 39.98, |
|
"learning_rate": 0.0006, |
|
"loss": 0.0768, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 39.98, |
|
"eval_exact_match": 0.5297539149888143, |
|
"eval_loss": 0.11308582127094269, |
|
"eval_runtime": 139.8147, |
|
"eval_samples_per_second": 15.985, |
|
"eval_steps_per_second": 2.003, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 46.65, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.0525, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 46.65, |
|
"eval_exact_match": 0.5413870246085011, |
|
"eval_loss": 0.12185565382242203, |
|
"eval_runtime": 132.0637, |
|
"eval_samples_per_second": 16.924, |
|
"eval_steps_per_second": 2.12, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.0801, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"eval_exact_match": 0.5275167785234899, |
|
"eval_loss": 0.11860152333974838, |
|
"eval_runtime": 132.256, |
|
"eval_samples_per_second": 16.899, |
|
"eval_steps_per_second": 2.117, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 59.98, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0331, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 59.98, |
|
"eval_exact_match": 0.5422818791946309, |
|
"eval_loss": 0.13056021928787231, |
|
"eval_runtime": 132.9523, |
|
"eval_samples_per_second": 16.811, |
|
"eval_steps_per_second": 2.106, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 66.65, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.0254, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 66.65, |
|
"eval_exact_match": 0.5395973154362416, |
|
"eval_loss": 0.13960428535938263, |
|
"eval_runtime": 135.7759, |
|
"eval_samples_per_second": 16.461, |
|
"eval_steps_per_second": 2.062, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.0168, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"eval_exact_match": 0.5436241610738255, |
|
"eval_loss": 0.15595464408397675, |
|
"eval_runtime": 134.0818, |
|
"eval_samples_per_second": 16.669, |
|
"eval_steps_per_second": 2.088, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 79.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0129, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 79.98, |
|
"eval_exact_match": 0.5494407158836689, |
|
"eval_loss": 0.16592496633529663, |
|
"eval_runtime": 133.0433, |
|
"eval_samples_per_second": 16.799, |
|
"eval_steps_per_second": 2.105, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 86.65, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0105, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 86.65, |
|
"eval_exact_match": 0.5422818791946309, |
|
"eval_loss": 0.16985595226287842, |
|
"eval_runtime": 137.112, |
|
"eval_samples_per_second": 16.301, |
|
"eval_steps_per_second": 2.042, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0088, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"eval_exact_match": 0.5472035794183445, |
|
"eval_loss": 0.17423103749752045, |
|
"eval_runtime": 132.2768, |
|
"eval_samples_per_second": 16.896, |
|
"eval_steps_per_second": 2.117, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"learning_rate": 0.0, |
|
"loss": 0.0077, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"eval_exact_match": 0.5467561521252796, |
|
"eval_loss": 0.17754317820072174, |
|
"eval_runtime": 132.953, |
|
"eval_samples_per_second": 16.81, |
|
"eval_steps_per_second": 2.106, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"step": 3000, |
|
"total_flos": 7.72413437221586e+16, |
|
"train_loss": 0.2523062037229538, |
|
"train_runtime": 33274.6395, |
|
"train_samples_per_second": 46.161, |
|
"train_steps_per_second": 0.09 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 100, |
|
"total_flos": 7.72413437221586e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|