viT5-base-coqe / trainer_state.json
duyvu8373's picture
Upload 12 files
8ad1e95 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 23.25581395348837,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 59.1033,
"eval_gen_len": 17.5513,
"eval_loss": 0.1559952348470688,
"eval_meteor": 0.7539,
"eval_runtime": 19.7643,
"eval_samples_per_second": 26.614,
"eval_steps_per_second": 0.86,
"step": 129
},
{
"epoch": 2.0,
"eval_bleu": 65.6424,
"eval_gen_len": 17.6027,
"eval_loss": 0.0991397500038147,
"eval_meteor": 0.8044,
"eval_runtime": 14.846,
"eval_samples_per_second": 35.43,
"eval_steps_per_second": 1.145,
"step": 258
},
{
"epoch": 3.0,
"eval_bleu": 70.6577,
"eval_gen_len": 17.5779,
"eval_loss": 0.06296151131391525,
"eval_meteor": 0.8488,
"eval_runtime": 14.7963,
"eval_samples_per_second": 35.549,
"eval_steps_per_second": 1.149,
"step": 387
},
{
"epoch": 3.88,
"learning_rate": 1.689922480620155e-05,
"loss": 0.3038,
"step": 500
},
{
"epoch": 4.0,
"eval_bleu": 71.6744,
"eval_gen_len": 17.5989,
"eval_loss": 0.04667546600103378,
"eval_meteor": 0.8522,
"eval_runtime": 14.7696,
"eval_samples_per_second": 35.614,
"eval_steps_per_second": 1.151,
"step": 516
},
{
"epoch": 5.0,
"eval_bleu": 72.4991,
"eval_gen_len": 17.6749,
"eval_loss": 0.038296110928058624,
"eval_meteor": 0.8509,
"eval_runtime": 14.8269,
"eval_samples_per_second": 35.476,
"eval_steps_per_second": 1.147,
"step": 645
},
{
"epoch": 6.0,
"eval_bleu": 72.5858,
"eval_gen_len": 17.6464,
"eval_loss": 0.03319519758224487,
"eval_meteor": 0.8548,
"eval_runtime": 14.6593,
"eval_samples_per_second": 35.882,
"eval_steps_per_second": 1.16,
"step": 774
},
{
"epoch": 7.0,
"eval_bleu": 74.3526,
"eval_gen_len": 17.6217,
"eval_loss": 0.023467697203159332,
"eval_meteor": 0.8734,
"eval_runtime": 14.705,
"eval_samples_per_second": 35.77,
"eval_steps_per_second": 1.156,
"step": 903
},
{
"epoch": 7.75,
"learning_rate": 1.3798449612403102e-05,
"loss": 0.0643,
"step": 1000
},
{
"epoch": 8.0,
"eval_bleu": 74.9962,
"eval_gen_len": 17.6141,
"eval_loss": 0.01849055290222168,
"eval_meteor": 0.8793,
"eval_runtime": 15.0305,
"eval_samples_per_second": 34.995,
"eval_steps_per_second": 1.131,
"step": 1032
},
{
"epoch": 9.0,
"eval_bleu": 75.5462,
"eval_gen_len": 17.6027,
"eval_loss": 0.014913694001734257,
"eval_meteor": 0.8862,
"eval_runtime": 14.5903,
"eval_samples_per_second": 36.051,
"eval_steps_per_second": 1.165,
"step": 1161
},
{
"epoch": 10.0,
"eval_bleu": 76.3236,
"eval_gen_len": 17.5798,
"eval_loss": 0.014180008322000504,
"eval_meteor": 0.8954,
"eval_runtime": 14.6398,
"eval_samples_per_second": 35.929,
"eval_steps_per_second": 1.161,
"step": 1290
},
{
"epoch": 11.0,
"eval_bleu": 75.8326,
"eval_gen_len": 17.5951,
"eval_loss": 0.010033702477812767,
"eval_meteor": 0.8888,
"eval_runtime": 14.6128,
"eval_samples_per_second": 35.996,
"eval_steps_per_second": 1.163,
"step": 1419
},
{
"epoch": 11.63,
"learning_rate": 1.0697674418604651e-05,
"loss": 0.0341,
"step": 1500
},
{
"epoch": 12.0,
"eval_bleu": 75.9138,
"eval_gen_len": 17.5951,
"eval_loss": 0.009980925358831882,
"eval_meteor": 0.8891,
"eval_runtime": 14.7328,
"eval_samples_per_second": 35.703,
"eval_steps_per_second": 1.154,
"step": 1548
},
{
"epoch": 13.0,
"eval_bleu": 76.0534,
"eval_gen_len": 17.5913,
"eval_loss": 0.0070331464521586895,
"eval_meteor": 0.8901,
"eval_runtime": 14.643,
"eval_samples_per_second": 35.922,
"eval_steps_per_second": 1.161,
"step": 1677
},
{
"epoch": 14.0,
"eval_bleu": 76.3943,
"eval_gen_len": 17.5798,
"eval_loss": 0.006607058458030224,
"eval_meteor": 0.8952,
"eval_runtime": 14.5594,
"eval_samples_per_second": 36.128,
"eval_steps_per_second": 1.168,
"step": 1806
},
{
"epoch": 15.0,
"eval_bleu": 76.9833,
"eval_gen_len": 17.5608,
"eval_loss": 0.003804780077189207,
"eval_meteor": 0.9027,
"eval_runtime": 14.9867,
"eval_samples_per_second": 35.098,
"eval_steps_per_second": 1.134,
"step": 1935
},
{
"epoch": 15.5,
"learning_rate": 7.596899224806202e-06,
"loss": 0.0191,
"step": 2000
},
{
"epoch": 16.0,
"eval_bleu": 76.9399,
"eval_gen_len": 17.5608,
"eval_loss": 0.0028171560261398554,
"eval_meteor": 0.9025,
"eval_runtime": 14.5931,
"eval_samples_per_second": 36.044,
"eval_steps_per_second": 1.165,
"step": 2064
},
{
"epoch": 17.0,
"eval_bleu": 76.5796,
"eval_gen_len": 17.5722,
"eval_loss": 0.005369492340832949,
"eval_meteor": 0.8979,
"eval_runtime": 14.6939,
"eval_samples_per_second": 35.797,
"eval_steps_per_second": 1.157,
"step": 2193
},
{
"epoch": 18.0,
"eval_bleu": 77.0507,
"eval_gen_len": 17.557,
"eval_loss": 0.002158859744668007,
"eval_meteor": 0.904,
"eval_runtime": 14.6859,
"eval_samples_per_second": 35.817,
"eval_steps_per_second": 1.158,
"step": 2322
},
{
"epoch": 19.0,
"eval_bleu": 76.3097,
"eval_gen_len": 17.5837,
"eval_loss": 0.0028479481115937233,
"eval_meteor": 0.8933,
"eval_runtime": 14.699,
"eval_samples_per_second": 35.785,
"eval_steps_per_second": 1.157,
"step": 2451
},
{
"epoch": 19.38,
"learning_rate": 4.4961240310077525e-06,
"loss": 0.0121,
"step": 2500
},
{
"epoch": 20.0,
"eval_bleu": 77.0507,
"eval_gen_len": 17.557,
"eval_loss": 0.0012633432634174824,
"eval_meteor": 0.904,
"eval_runtime": 14.9177,
"eval_samples_per_second": 35.26,
"eval_steps_per_second": 1.14,
"step": 2580
},
{
"epoch": 21.0,
"eval_bleu": 76.5168,
"eval_gen_len": 17.576,
"eval_loss": 0.001905079698190093,
"eval_meteor": 0.8965,
"eval_runtime": 14.6207,
"eval_samples_per_second": 35.976,
"eval_steps_per_second": 1.163,
"step": 2709
},
{
"epoch": 22.0,
"eval_bleu": 77.2739,
"eval_gen_len": 17.5494,
"eval_loss": 0.0008121016435325146,
"eval_meteor": 0.9072,
"eval_runtime": 14.6135,
"eval_samples_per_second": 35.994,
"eval_steps_per_second": 1.163,
"step": 2838
},
{
"epoch": 23.0,
"eval_bleu": 77.1609,
"eval_gen_len": 17.5532,
"eval_loss": 0.0007495949394069612,
"eval_meteor": 0.9056,
"eval_runtime": 14.5508,
"eval_samples_per_second": 36.149,
"eval_steps_per_second": 1.168,
"step": 2967
},
{
"epoch": 23.26,
"learning_rate": 1.3953488372093025e-06,
"loss": 0.0083,
"step": 3000
}
],
"logging_steps": 500,
"max_steps": 3225,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"total_flos": 1.172703512236032e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}