|
{ |
|
"best_metric": 2.5794920921325684, |
|
"best_model_checkpoint": "/kaggle/working/best/checkpoint-950", |
|
"epoch": 2.6095654892153797, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"eval_BLEU_ach": 4.4065, |
|
"eval_BLEU_lgg": 1.7478, |
|
"eval_BLEU_lug": 19.3906, |
|
"eval_BLEU_mean": 6.3725, |
|
"eval_BLEU_nyn": 5.6412, |
|
"eval_BLEU_teo": 0.6765, |
|
"eval_loss": 3.7197346687316895, |
|
"eval_runtime": 81.4138, |
|
"eval_samples_per_second": 30.707, |
|
"eval_steps_per_second": 0.614, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_BLEU_ach": 10.4487, |
|
"eval_BLEU_lgg": 6.9863, |
|
"eval_BLEU_lug": 22.1636, |
|
"eval_BLEU_mean": 11.2797, |
|
"eval_BLEU_nyn": 9.4768, |
|
"eval_BLEU_teo": 7.3231, |
|
"eval_loss": 3.190028429031372, |
|
"eval_runtime": 80.5044, |
|
"eval_samples_per_second": 31.054, |
|
"eval_steps_per_second": 0.621, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_BLEU_ach": 12.7721, |
|
"eval_BLEU_lgg": 10.1002, |
|
"eval_BLEU_lug": 23.7283, |
|
"eval_BLEU_mean": 13.7176, |
|
"eval_BLEU_nyn": 11.3816, |
|
"eval_BLEU_teo": 10.606, |
|
"eval_loss": 2.9923548698425293, |
|
"eval_runtime": 80.2426, |
|
"eval_samples_per_second": 31.156, |
|
"eval_steps_per_second": 0.623, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_BLEU_ach": 14.146, |
|
"eval_BLEU_lgg": 13.022, |
|
"eval_BLEU_lug": 24.6207, |
|
"eval_BLEU_mean": 15.4215, |
|
"eval_BLEU_nyn": 11.8514, |
|
"eval_BLEU_teo": 13.4677, |
|
"eval_loss": 2.884697437286377, |
|
"eval_runtime": 81.1433, |
|
"eval_samples_per_second": 30.81, |
|
"eval_steps_per_second": 0.616, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_BLEU_ach": 14.4905, |
|
"eval_BLEU_lgg": 13.8605, |
|
"eval_BLEU_lug": 25.8326, |
|
"eval_BLEU_mean": 15.9588, |
|
"eval_BLEU_nyn": 11.7846, |
|
"eval_BLEU_teo": 13.8258, |
|
"eval_loss": 2.819976806640625, |
|
"eval_runtime": 82.5563, |
|
"eval_samples_per_second": 30.282, |
|
"eval_steps_per_second": 0.606, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_BLEU_ach": 15.9859, |
|
"eval_BLEU_lgg": 15.2204, |
|
"eval_BLEU_lug": 25.4128, |
|
"eval_BLEU_mean": 17.0546, |
|
"eval_BLEU_nyn": 13.1463, |
|
"eval_BLEU_teo": 15.5075, |
|
"eval_loss": 2.7662155628204346, |
|
"eval_runtime": 80.9944, |
|
"eval_samples_per_second": 30.866, |
|
"eval_steps_per_second": 0.617, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_BLEU_ach": 16.6088, |
|
"eval_BLEU_lgg": 15.9575, |
|
"eval_BLEU_lug": 25.2816, |
|
"eval_BLEU_mean": 17.3754, |
|
"eval_BLEU_nyn": 12.738, |
|
"eval_BLEU_teo": 16.2911, |
|
"eval_loss": 2.72863507270813, |
|
"eval_runtime": 80.9213, |
|
"eval_samples_per_second": 30.894, |
|
"eval_steps_per_second": 0.618, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_BLEU_ach": 17.0217, |
|
"eval_BLEU_lgg": 16.0095, |
|
"eval_BLEU_lug": 25.841, |
|
"eval_BLEU_mean": 17.8794, |
|
"eval_BLEU_nyn": 13.4439, |
|
"eval_BLEU_teo": 17.0808, |
|
"eval_loss": 2.703911781311035, |
|
"eval_runtime": 82.5765, |
|
"eval_samples_per_second": 30.275, |
|
"eval_steps_per_second": 0.605, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_BLEU_ach": 16.8919, |
|
"eval_BLEU_lgg": 17.5364, |
|
"eval_BLEU_lug": 25.8493, |
|
"eval_BLEU_mean": 18.3704, |
|
"eval_BLEU_nyn": 13.432, |
|
"eval_BLEU_teo": 18.1425, |
|
"eval_loss": 2.6822402477264404, |
|
"eval_runtime": 82.4142, |
|
"eval_samples_per_second": 30.335, |
|
"eval_steps_per_second": 0.607, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00043472584856396867, |
|
"loss": 2.7655, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_BLEU_ach": 16.8644, |
|
"eval_BLEU_lgg": 17.662, |
|
"eval_BLEU_lug": 26.7519, |
|
"eval_BLEU_mean": 18.8597, |
|
"eval_BLEU_nyn": 14.0338, |
|
"eval_BLEU_teo": 18.9866, |
|
"eval_loss": 2.6606709957122803, |
|
"eval_runtime": 82.2093, |
|
"eval_samples_per_second": 30.41, |
|
"eval_steps_per_second": 0.608, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_BLEU_ach": 17.7792, |
|
"eval_BLEU_lgg": 17.2092, |
|
"eval_BLEU_lug": 26.9857, |
|
"eval_BLEU_mean": 18.9292, |
|
"eval_BLEU_nyn": 13.7654, |
|
"eval_BLEU_teo": 18.9065, |
|
"eval_loss": 2.6529688835144043, |
|
"eval_runtime": 81.4731, |
|
"eval_samples_per_second": 30.685, |
|
"eval_steps_per_second": 0.614, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_BLEU_ach": 17.5146, |
|
"eval_BLEU_lgg": 17.6765, |
|
"eval_BLEU_lug": 26.2161, |
|
"eval_BLEU_mean": 18.7786, |
|
"eval_BLEU_nyn": 13.9067, |
|
"eval_BLEU_teo": 18.5792, |
|
"eval_loss": 2.640359401702881, |
|
"eval_runtime": 82.7573, |
|
"eval_samples_per_second": 30.209, |
|
"eval_steps_per_second": 0.604, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_BLEU_ach": 18.0286, |
|
"eval_BLEU_lgg": 17.2142, |
|
"eval_BLEU_lug": 27.1338, |
|
"eval_BLEU_mean": 19.1169, |
|
"eval_BLEU_nyn": 14.3399, |
|
"eval_BLEU_teo": 18.8677, |
|
"eval_loss": 2.6239874362945557, |
|
"eval_runtime": 83.0768, |
|
"eval_samples_per_second": 30.093, |
|
"eval_steps_per_second": 0.602, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_BLEU_ach": 18.2373, |
|
"eval_BLEU_lgg": 17.8534, |
|
"eval_BLEU_lug": 27.2152, |
|
"eval_BLEU_mean": 19.2291, |
|
"eval_BLEU_nyn": 14.9249, |
|
"eval_BLEU_teo": 17.9148, |
|
"eval_loss": 2.6153488159179688, |
|
"eval_runtime": 83.4372, |
|
"eval_samples_per_second": 29.963, |
|
"eval_steps_per_second": 0.599, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_BLEU_ach": 18.1915, |
|
"eval_BLEU_lgg": 18.7514, |
|
"eval_BLEU_lug": 27.1244, |
|
"eval_BLEU_mean": 19.5482, |
|
"eval_BLEU_nyn": 14.923, |
|
"eval_BLEU_teo": 18.7509, |
|
"eval_loss": 2.605884075164795, |
|
"eval_runtime": 83.7259, |
|
"eval_samples_per_second": 29.859, |
|
"eval_steps_per_second": 0.597, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_BLEU_ach": 18.1001, |
|
"eval_BLEU_lgg": 18.7958, |
|
"eval_BLEU_lug": 27.0829, |
|
"eval_BLEU_mean": 19.6395, |
|
"eval_BLEU_nyn": 14.6849, |
|
"eval_BLEU_teo": 19.534, |
|
"eval_loss": 2.5994479656219482, |
|
"eval_runtime": 81.9571, |
|
"eval_samples_per_second": 30.504, |
|
"eval_steps_per_second": 0.61, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_BLEU_ach": 18.4623, |
|
"eval_BLEU_lgg": 18.923, |
|
"eval_BLEU_lug": 27.8801, |
|
"eval_BLEU_mean": 20.1857, |
|
"eval_BLEU_nyn": 15.4508, |
|
"eval_BLEU_teo": 20.2125, |
|
"eval_loss": 2.6000475883483887, |
|
"eval_runtime": 82.634, |
|
"eval_samples_per_second": 30.254, |
|
"eval_steps_per_second": 0.605, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_BLEU_ach": 17.984, |
|
"eval_BLEU_lgg": 19.1317, |
|
"eval_BLEU_lug": 27.4247, |
|
"eval_BLEU_mean": 19.9146, |
|
"eval_BLEU_nyn": 15.4422, |
|
"eval_BLEU_teo": 19.5903, |
|
"eval_loss": 2.595747470855713, |
|
"eval_runtime": 82.2257, |
|
"eval_samples_per_second": 30.404, |
|
"eval_steps_per_second": 0.608, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_BLEU_ach": 18.2931, |
|
"eval_BLEU_lgg": 19.5551, |
|
"eval_BLEU_lug": 26.6605, |
|
"eval_BLEU_mean": 19.8366, |
|
"eval_BLEU_nyn": 14.4395, |
|
"eval_BLEU_teo": 20.2348, |
|
"eval_loss": 2.5794920921325684, |
|
"eval_runtime": 83.6122, |
|
"eval_samples_per_second": 29.9, |
|
"eval_steps_per_second": 0.598, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0003694516971279374, |
|
"loss": 2.2915, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_BLEU_ach": 19.06, |
|
"eval_BLEU_lgg": 19.0945, |
|
"eval_BLEU_lug": 27.6451, |
|
"eval_BLEU_mean": 20.1345, |
|
"eval_BLEU_nyn": 15.1057, |
|
"eval_BLEU_teo": 19.7672, |
|
"eval_loss": 2.580702781677246, |
|
"eval_runtime": 84.0777, |
|
"eval_samples_per_second": 29.734, |
|
"eval_steps_per_second": 0.595, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 3830, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.668611525246976e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|