tyzhu's picture
Training in progress, epoch 7, checkpoint
2f68291 verified
raw
history blame
6.84 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 3703,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 5e-05,
"loss": 1.6355,
"step": 100
},
{
"epoch": 0.38,
"learning_rate": 5e-05,
"loss": 1.4037,
"step": 200
},
{
"epoch": 0.57,
"learning_rate": 5e-05,
"loss": 1.4183,
"step": 300
},
{
"epoch": 0.76,
"learning_rate": 5e-05,
"loss": 1.3881,
"step": 400
},
{
"epoch": 0.95,
"learning_rate": 5e-05,
"loss": 1.3891,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.6137948717948718,
"eval_loss": 1.3015451431274414,
"eval_runtime": 4.1821,
"eval_samples_per_second": 119.557,
"eval_steps_per_second": 15.064,
"step": 529
},
{
"epoch": 1.0,
"eval_exact_match": 23.2,
"eval_f1": 35.17014759120022,
"step": 529
},
{
"epoch": 1.13,
"learning_rate": 5e-05,
"loss": 1.3808,
"step": 600
},
{
"epoch": 1.32,
"learning_rate": 5e-05,
"loss": 1.348,
"step": 700
},
{
"epoch": 1.51,
"learning_rate": 5e-05,
"loss": 1.3627,
"step": 800
},
{
"epoch": 1.7,
"learning_rate": 5e-05,
"loss": 1.3473,
"step": 900
},
{
"epoch": 1.89,
"learning_rate": 5e-05,
"loss": 1.3633,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.6165641025641025,
"eval_loss": 1.2854759693145752,
"eval_runtime": 4.1064,
"eval_samples_per_second": 121.761,
"eval_steps_per_second": 15.342,
"step": 1058
},
{
"epoch": 2.0,
"eval_exact_match": 25.8,
"eval_f1": 37.509047619047635,
"step": 1058
},
{
"epoch": 2.08,
"learning_rate": 5e-05,
"loss": 1.3358,
"step": 1100
},
{
"epoch": 2.27,
"learning_rate": 5e-05,
"loss": 1.2917,
"step": 1200
},
{
"epoch": 2.46,
"learning_rate": 5e-05,
"loss": 1.2948,
"step": 1300
},
{
"epoch": 2.65,
"learning_rate": 5e-05,
"loss": 1.3144,
"step": 1400
},
{
"epoch": 2.84,
"learning_rate": 5e-05,
"loss": 1.2929,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6176923076923077,
"eval_loss": 1.2953603267669678,
"eval_runtime": 4.0992,
"eval_samples_per_second": 121.974,
"eval_steps_per_second": 15.369,
"step": 1587
},
{
"epoch": 3.0,
"eval_exact_match": 25.0,
"eval_f1": 37.08253968253969,
"step": 1587
},
{
"epoch": 3.02,
"learning_rate": 5e-05,
"loss": 1.2906,
"step": 1600
},
{
"epoch": 3.21,
"learning_rate": 5e-05,
"loss": 1.2216,
"step": 1700
},
{
"epoch": 3.4,
"learning_rate": 5e-05,
"loss": 1.2459,
"step": 1800
},
{
"epoch": 3.59,
"learning_rate": 5e-05,
"loss": 1.2474,
"step": 1900
},
{
"epoch": 3.78,
"learning_rate": 5e-05,
"loss": 1.2518,
"step": 2000
},
{
"epoch": 3.97,
"learning_rate": 5e-05,
"loss": 1.2361,
"step": 2100
},
{
"epoch": 4.0,
"eval_accuracy": 0.6045128205128205,
"eval_loss": 1.3488503694534302,
"eval_runtime": 4.2015,
"eval_samples_per_second": 119.004,
"eval_steps_per_second": 14.995,
"step": 2116
},
{
"epoch": 4.0,
"eval_exact_match": 25.0,
"eval_f1": 36.82126984126986,
"step": 2116
},
{
"epoch": 4.16,
"learning_rate": 5e-05,
"loss": 1.1856,
"step": 2200
},
{
"epoch": 4.35,
"learning_rate": 5e-05,
"loss": 1.1556,
"step": 2300
},
{
"epoch": 4.54,
"learning_rate": 5e-05,
"loss": 1.1828,
"step": 2400
},
{
"epoch": 4.73,
"learning_rate": 5e-05,
"loss": 1.1687,
"step": 2500
},
{
"epoch": 4.91,
"learning_rate": 5e-05,
"loss": 1.1856,
"step": 2600
},
{
"epoch": 5.0,
"eval_accuracy": 0.6125128205128205,
"eval_loss": 1.3968186378479004,
"eval_runtime": 4.3081,
"eval_samples_per_second": 116.06,
"eval_steps_per_second": 14.624,
"step": 2645
},
{
"epoch": 5.0,
"eval_exact_match": 24.6,
"eval_f1": 37.079523809523806,
"step": 2645
},
{
"epoch": 5.1,
"learning_rate": 5e-05,
"loss": 1.1376,
"step": 2700
},
{
"epoch": 5.29,
"learning_rate": 5e-05,
"loss": 1.1079,
"step": 2800
},
{
"epoch": 5.48,
"learning_rate": 5e-05,
"loss": 1.0954,
"step": 2900
},
{
"epoch": 5.67,
"learning_rate": 5e-05,
"loss": 1.1317,
"step": 3000
},
{
"epoch": 5.86,
"learning_rate": 5e-05,
"loss": 1.1098,
"step": 3100
},
{
"epoch": 6.0,
"eval_accuracy": 0.6114871794871795,
"eval_loss": 1.4720605611801147,
"eval_runtime": 4.5,
"eval_samples_per_second": 111.11,
"eval_steps_per_second": 14.0,
"step": 3174
},
{
"epoch": 6.0,
"eval_exact_match": 24.4,
"eval_f1": 35.78015873015873,
"step": 3174
},
{
"epoch": 6.05,
"learning_rate": 5e-05,
"loss": 1.0945,
"step": 3200
},
{
"epoch": 6.24,
"learning_rate": 5e-05,
"loss": 1.0309,
"step": 3300
},
{
"epoch": 6.43,
"learning_rate": 5e-05,
"loss": 1.0389,
"step": 3400
},
{
"epoch": 6.62,
"learning_rate": 5e-05,
"loss": 1.0547,
"step": 3500
},
{
"epoch": 6.81,
"learning_rate": 5e-05,
"loss": 1.0618,
"step": 3600
},
{
"epoch": 6.99,
"learning_rate": 5e-05,
"loss": 1.0753,
"step": 3700
},
{
"epoch": 7.0,
"eval_accuracy": 0.6076410256410256,
"eval_loss": 1.5797967910766602,
"eval_runtime": 4.8128,
"eval_samples_per_second": 103.891,
"eval_steps_per_second": 13.09,
"step": 3703
},
{
"epoch": 7.0,
"eval_exact_match": 21.8,
"eval_f1": 33.15111111111111,
"step": 3703
}
],
"logging_steps": 100,
"max_steps": 26450,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 6.255076307642614e+17,
"trial_name": null,
"trial_params": null
}