table_qa_1 / last-checkpoint /trainer_state.json
daydrill's picture
Training in progress, step 30000
7059e46
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9773260359655981,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 2.8373729476153247e-05,
"loss": 1.6126,
"step": 5000
},
{
"epoch": 0.16,
"eval_exact_match": 68.15336463223787,
"eval_f1": 73.98256617857639,
"eval_loss": 1.189887523651123,
"eval_runtime": 1542.7266,
"eval_samples_per_second": 7.463,
"eval_steps_per_second": 3.732,
"step": 5000
},
{
"epoch": 0.33,
"learning_rate": 2.674550430023456e-05,
"loss": 1.1965,
"step": 10000
},
{
"epoch": 0.33,
"eval_exact_match": 75.13475917231786,
"eval_f1": 79.95184555682803,
"eval_loss": 0.9056070446968079,
"eval_runtime": 1533.9546,
"eval_samples_per_second": 7.505,
"eval_steps_per_second": 3.753,
"step": 10000
},
{
"epoch": 0.49,
"learning_rate": 2.511695334897055e-05,
"loss": 1.0769,
"step": 15000
},
{
"epoch": 0.49,
"eval_exact_match": 77.56042427403929,
"eval_f1": 82.22503770504048,
"eval_loss": 0.9495312571525574,
"eval_runtime": 1533.818,
"eval_samples_per_second": 7.506,
"eval_steps_per_second": 3.753,
"step": 15000
},
{
"epoch": 0.65,
"learning_rate": 2.3488728173051864e-05,
"loss": 0.9992,
"step": 20000
},
{
"epoch": 0.65,
"eval_exact_match": 81.09024517475221,
"eval_f1": 85.49992934663945,
"eval_loss": 0.8323877453804016,
"eval_runtime": 1541.3611,
"eval_samples_per_second": 7.469,
"eval_steps_per_second": 3.735,
"step": 20000
},
{
"epoch": 0.81,
"learning_rate": 2.1860502997133176e-05,
"loss": 0.9329,
"step": 25000
},
{
"epoch": 0.81,
"eval_exact_match": 81.264127977743,
"eval_f1": 85.43481659242194,
"eval_loss": 0.7231199741363525,
"eval_runtime": 1535.496,
"eval_samples_per_second": 7.498,
"eval_steps_per_second": 3.749,
"step": 25000
},
{
"epoch": 0.98,
"learning_rate": 2.0232277821214492e-05,
"loss": 0.8913,
"step": 30000
},
{
"epoch": 0.98,
"eval_exact_match": 81.92488262910798,
"eval_f1": 85.96065424027016,
"eval_loss": 0.7569286227226257,
"eval_runtime": 1529.0019,
"eval_samples_per_second": 7.53,
"eval_steps_per_second": 3.765,
"step": 30000
}
],
"max_steps": 92088,
"num_train_epochs": 3,
"total_flos": 6.619410874368e+16,
"trial_name": null,
"trial_params": null
}