|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 23.25581395348837, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 59.1033, |
|
"eval_gen_len": 17.5513, |
|
"eval_loss": 0.1559952348470688, |
|
"eval_meteor": 0.7539, |
|
"eval_runtime": 19.7643, |
|
"eval_samples_per_second": 26.614, |
|
"eval_steps_per_second": 0.86, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 65.6424, |
|
"eval_gen_len": 17.6027, |
|
"eval_loss": 0.0991397500038147, |
|
"eval_meteor": 0.8044, |
|
"eval_runtime": 14.846, |
|
"eval_samples_per_second": 35.43, |
|
"eval_steps_per_second": 1.145, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 70.6577, |
|
"eval_gen_len": 17.5779, |
|
"eval_loss": 0.06296151131391525, |
|
"eval_meteor": 0.8488, |
|
"eval_runtime": 14.7963, |
|
"eval_samples_per_second": 35.549, |
|
"eval_steps_per_second": 1.149, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.689922480620155e-05, |
|
"loss": 0.3038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 71.6744, |
|
"eval_gen_len": 17.5989, |
|
"eval_loss": 0.04667546600103378, |
|
"eval_meteor": 0.8522, |
|
"eval_runtime": 14.7696, |
|
"eval_samples_per_second": 35.614, |
|
"eval_steps_per_second": 1.151, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 72.4991, |
|
"eval_gen_len": 17.6749, |
|
"eval_loss": 0.038296110928058624, |
|
"eval_meteor": 0.8509, |
|
"eval_runtime": 14.8269, |
|
"eval_samples_per_second": 35.476, |
|
"eval_steps_per_second": 1.147, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 72.5858, |
|
"eval_gen_len": 17.6464, |
|
"eval_loss": 0.03319519758224487, |
|
"eval_meteor": 0.8548, |
|
"eval_runtime": 14.6593, |
|
"eval_samples_per_second": 35.882, |
|
"eval_steps_per_second": 1.16, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 74.3526, |
|
"eval_gen_len": 17.6217, |
|
"eval_loss": 0.023467697203159332, |
|
"eval_meteor": 0.8734, |
|
"eval_runtime": 14.705, |
|
"eval_samples_per_second": 35.77, |
|
"eval_steps_per_second": 1.156, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 1.3798449612403102e-05, |
|
"loss": 0.0643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 74.9962, |
|
"eval_gen_len": 17.6141, |
|
"eval_loss": 0.01849055290222168, |
|
"eval_meteor": 0.8793, |
|
"eval_runtime": 15.0305, |
|
"eval_samples_per_second": 34.995, |
|
"eval_steps_per_second": 1.131, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 75.5462, |
|
"eval_gen_len": 17.6027, |
|
"eval_loss": 0.014913694001734257, |
|
"eval_meteor": 0.8862, |
|
"eval_runtime": 14.5903, |
|
"eval_samples_per_second": 36.051, |
|
"eval_steps_per_second": 1.165, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 76.3236, |
|
"eval_gen_len": 17.5798, |
|
"eval_loss": 0.014180008322000504, |
|
"eval_meteor": 0.8954, |
|
"eval_runtime": 14.6398, |
|
"eval_samples_per_second": 35.929, |
|
"eval_steps_per_second": 1.161, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 75.8326, |
|
"eval_gen_len": 17.5951, |
|
"eval_loss": 0.010033702477812767, |
|
"eval_meteor": 0.8888, |
|
"eval_runtime": 14.6128, |
|
"eval_samples_per_second": 35.996, |
|
"eval_steps_per_second": 1.163, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 1.0697674418604651e-05, |
|
"loss": 0.0341, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 75.9138, |
|
"eval_gen_len": 17.5951, |
|
"eval_loss": 0.009980925358831882, |
|
"eval_meteor": 0.8891, |
|
"eval_runtime": 14.7328, |
|
"eval_samples_per_second": 35.703, |
|
"eval_steps_per_second": 1.154, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 76.0534, |
|
"eval_gen_len": 17.5913, |
|
"eval_loss": 0.0070331464521586895, |
|
"eval_meteor": 0.8901, |
|
"eval_runtime": 14.643, |
|
"eval_samples_per_second": 35.922, |
|
"eval_steps_per_second": 1.161, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 76.3943, |
|
"eval_gen_len": 17.5798, |
|
"eval_loss": 0.006607058458030224, |
|
"eval_meteor": 0.8952, |
|
"eval_runtime": 14.5594, |
|
"eval_samples_per_second": 36.128, |
|
"eval_steps_per_second": 1.168, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 76.9833, |
|
"eval_gen_len": 17.5608, |
|
"eval_loss": 0.003804780077189207, |
|
"eval_meteor": 0.9027, |
|
"eval_runtime": 14.9867, |
|
"eval_samples_per_second": 35.098, |
|
"eval_steps_per_second": 1.134, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 7.596899224806202e-06, |
|
"loss": 0.0191, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 76.9399, |
|
"eval_gen_len": 17.5608, |
|
"eval_loss": 0.0028171560261398554, |
|
"eval_meteor": 0.9025, |
|
"eval_runtime": 14.5931, |
|
"eval_samples_per_second": 36.044, |
|
"eval_steps_per_second": 1.165, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 76.5796, |
|
"eval_gen_len": 17.5722, |
|
"eval_loss": 0.005369492340832949, |
|
"eval_meteor": 0.8979, |
|
"eval_runtime": 14.6939, |
|
"eval_samples_per_second": 35.797, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 77.0507, |
|
"eval_gen_len": 17.557, |
|
"eval_loss": 0.002158859744668007, |
|
"eval_meteor": 0.904, |
|
"eval_runtime": 14.6859, |
|
"eval_samples_per_second": 35.817, |
|
"eval_steps_per_second": 1.158, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 76.3097, |
|
"eval_gen_len": 17.5837, |
|
"eval_loss": 0.0028479481115937233, |
|
"eval_meteor": 0.8933, |
|
"eval_runtime": 14.699, |
|
"eval_samples_per_second": 35.785, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 4.4961240310077525e-06, |
|
"loss": 0.0121, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 77.0507, |
|
"eval_gen_len": 17.557, |
|
"eval_loss": 0.0012633432634174824, |
|
"eval_meteor": 0.904, |
|
"eval_runtime": 14.9177, |
|
"eval_samples_per_second": 35.26, |
|
"eval_steps_per_second": 1.14, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 76.5168, |
|
"eval_gen_len": 17.576, |
|
"eval_loss": 0.001905079698190093, |
|
"eval_meteor": 0.8965, |
|
"eval_runtime": 14.6207, |
|
"eval_samples_per_second": 35.976, |
|
"eval_steps_per_second": 1.163, |
|
"step": 2709 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 77.2739, |
|
"eval_gen_len": 17.5494, |
|
"eval_loss": 0.0008121016435325146, |
|
"eval_meteor": 0.9072, |
|
"eval_runtime": 14.6135, |
|
"eval_samples_per_second": 35.994, |
|
"eval_steps_per_second": 1.163, |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 77.1609, |
|
"eval_gen_len": 17.5532, |
|
"eval_loss": 0.0007495949394069612, |
|
"eval_meteor": 0.9056, |
|
"eval_runtime": 14.5508, |
|
"eval_samples_per_second": 36.149, |
|
"eval_steps_per_second": 1.168, |
|
"step": 2967 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 1.3953488372093025e-06, |
|
"loss": 0.0083, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"total_flos": 1.172703512236032e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|