|
{ |
|
"best_metric": 0.030559765174984932, |
|
"best_model_checkpoint": "/data/wheld3/mt5-small-pointer-top_v2/checkpoint-3000", |
|
"epoch": 12.345168539325842, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.9316, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 0.008391608391608392, |
|
"eval_loss": 0.45658260583877563, |
|
"eval_runtime": 661.3564, |
|
"eval_samples_per_second": 25.947, |
|
"eval_steps_per_second": 3.243, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.3713, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_exact_match": 0.12301864801864802, |
|
"eval_loss": 0.14726108312606812, |
|
"eval_runtime": 655.0457, |
|
"eval_samples_per_second": 26.197, |
|
"eval_steps_per_second": 3.275, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0008, |
|
"loss": 0.1747, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_exact_match": 0.19842657342657344, |
|
"eval_loss": 0.07877045124769211, |
|
"eval_runtime": 646.2259, |
|
"eval_samples_per_second": 26.554, |
|
"eval_steps_per_second": 3.319, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.1104, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_exact_match": 0.21486013986013985, |
|
"eval_loss": 0.056847672909498215, |
|
"eval_runtime": 648.473, |
|
"eval_samples_per_second": 26.462, |
|
"eval_steps_per_second": 3.308, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.0842, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_exact_match": 0.22173659673659674, |
|
"eval_loss": 0.04728136211633682, |
|
"eval_runtime": 656.2209, |
|
"eval_samples_per_second": 26.15, |
|
"eval_steps_per_second": 3.269, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0006, |
|
"loss": 0.0694, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_exact_match": 0.225990675990676, |
|
"eval_loss": 0.0425742082297802, |
|
"eval_runtime": 650.7447, |
|
"eval_samples_per_second": 26.37, |
|
"eval_steps_per_second": 3.296, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.0603, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_exact_match": 0.2279137529137529, |
|
"eval_loss": 0.03834143653512001, |
|
"eval_runtime": 652.2239, |
|
"eval_samples_per_second": 26.31, |
|
"eval_steps_per_second": 3.289, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.0534, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_exact_match": 0.22808857808857808, |
|
"eval_loss": 0.03673423081636429, |
|
"eval_runtime": 652.7536, |
|
"eval_samples_per_second": 26.289, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0477, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_exact_match": 0.23006993006993007, |
|
"eval_loss": 0.03471648693084717, |
|
"eval_runtime": 664.2219, |
|
"eval_samples_per_second": 25.835, |
|
"eval_steps_per_second": 3.229, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.0441, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_exact_match": 0.23135198135198135, |
|
"eval_loss": 0.03336101025342941, |
|
"eval_runtime": 672.8476, |
|
"eval_samples_per_second": 25.504, |
|
"eval_steps_per_second": 3.188, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.0413, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_exact_match": 0.23146853146853147, |
|
"eval_loss": 0.03233984857797623, |
|
"eval_runtime": 660.8292, |
|
"eval_samples_per_second": 25.967, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0387, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_exact_match": 0.2315850815850816, |
|
"eval_loss": 0.03159063309431076, |
|
"eval_runtime": 652.4785, |
|
"eval_samples_per_second": 26.3, |
|
"eval_steps_per_second": 3.287, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0366, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_exact_match": 0.2324009324009324, |
|
"eval_loss": 0.03113115206360817, |
|
"eval_runtime": 657.1693, |
|
"eval_samples_per_second": 26.112, |
|
"eval_steps_per_second": 3.264, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0358, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_exact_match": 0.2324009324009324, |
|
"eval_loss": 0.03069169819355011, |
|
"eval_runtime": 652.5597, |
|
"eval_samples_per_second": 26.296, |
|
"eval_steps_per_second": 3.287, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0, |
|
"loss": 0.0343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"eval_exact_match": 0.2326923076923077, |
|
"eval_loss": 0.030559765174984932, |
|
"eval_runtime": 648.4066, |
|
"eval_samples_per_second": 26.465, |
|
"eval_steps_per_second": 3.308, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"step": 3000, |
|
"total_flos": 3.654446752585728e+16, |
|
"train_loss": 0.2089100898106893, |
|
"train_runtime": 30994.3996, |
|
"train_samples_per_second": 49.557, |
|
"train_steps_per_second": 0.097 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 13, |
|
"total_flos": 3.654446752585728e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|