{ "best_metric": 2.5794920921325684, "best_model_checkpoint": "/kaggle/working/best/checkpoint-950", "epoch": 2.6095654892153797, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "eval_BLEU_ach": 4.4065, "eval_BLEU_lgg": 1.7478, "eval_BLEU_lug": 19.3906, "eval_BLEU_mean": 6.3725, "eval_BLEU_nyn": 5.6412, "eval_BLEU_teo": 0.6765, "eval_loss": 3.7197346687316895, "eval_runtime": 81.4138, "eval_samples_per_second": 30.707, "eval_steps_per_second": 0.614, "step": 50 }, { "epoch": 0.26, "eval_BLEU_ach": 10.4487, "eval_BLEU_lgg": 6.9863, "eval_BLEU_lug": 22.1636, "eval_BLEU_mean": 11.2797, "eval_BLEU_nyn": 9.4768, "eval_BLEU_teo": 7.3231, "eval_loss": 3.190028429031372, "eval_runtime": 80.5044, "eval_samples_per_second": 31.054, "eval_steps_per_second": 0.621, "step": 100 }, { "epoch": 0.39, "eval_BLEU_ach": 12.7721, "eval_BLEU_lgg": 10.1002, "eval_BLEU_lug": 23.7283, "eval_BLEU_mean": 13.7176, "eval_BLEU_nyn": 11.3816, "eval_BLEU_teo": 10.606, "eval_loss": 2.9923548698425293, "eval_runtime": 80.2426, "eval_samples_per_second": 31.156, "eval_steps_per_second": 0.623, "step": 150 }, { "epoch": 0.52, "eval_BLEU_ach": 14.146, "eval_BLEU_lgg": 13.022, "eval_BLEU_lug": 24.6207, "eval_BLEU_mean": 15.4215, "eval_BLEU_nyn": 11.8514, "eval_BLEU_teo": 13.4677, "eval_loss": 2.884697437286377, "eval_runtime": 81.1433, "eval_samples_per_second": 30.81, "eval_steps_per_second": 0.616, "step": 200 }, { "epoch": 0.65, "eval_BLEU_ach": 14.4905, "eval_BLEU_lgg": 13.8605, "eval_BLEU_lug": 25.8326, "eval_BLEU_mean": 15.9588, "eval_BLEU_nyn": 11.7846, "eval_BLEU_teo": 13.8258, "eval_loss": 2.819976806640625, "eval_runtime": 82.5563, "eval_samples_per_second": 30.282, "eval_steps_per_second": 0.606, "step": 250 }, { "epoch": 0.78, "eval_BLEU_ach": 15.9859, "eval_BLEU_lgg": 15.2204, "eval_BLEU_lug": 25.4128, "eval_BLEU_mean": 17.0546, "eval_BLEU_nyn": 13.1463, "eval_BLEU_teo": 15.5075, "eval_loss": 2.7662155628204346, "eval_runtime": 80.9944, "eval_samples_per_second": 30.866, "eval_steps_per_second": 0.617, "step": 300 }, { "epoch": 0.91, "eval_BLEU_ach": 16.6088, "eval_BLEU_lgg": 15.9575, "eval_BLEU_lug": 25.2816, "eval_BLEU_mean": 17.3754, "eval_BLEU_nyn": 12.738, "eval_BLEU_teo": 16.2911, "eval_loss": 2.72863507270813, "eval_runtime": 80.9213, "eval_samples_per_second": 30.894, "eval_steps_per_second": 0.618, "step": 350 }, { "epoch": 1.04, "eval_BLEU_ach": 17.0217, "eval_BLEU_lgg": 16.0095, "eval_BLEU_lug": 25.841, "eval_BLEU_mean": 17.8794, "eval_BLEU_nyn": 13.4439, "eval_BLEU_teo": 17.0808, "eval_loss": 2.703911781311035, "eval_runtime": 82.5765, "eval_samples_per_second": 30.275, "eval_steps_per_second": 0.605, "step": 400 }, { "epoch": 1.17, "eval_BLEU_ach": 16.8919, "eval_BLEU_lgg": 17.5364, "eval_BLEU_lug": 25.8493, "eval_BLEU_mean": 18.3704, "eval_BLEU_nyn": 13.432, "eval_BLEU_teo": 18.1425, "eval_loss": 2.6822402477264404, "eval_runtime": 82.4142, "eval_samples_per_second": 30.335, "eval_steps_per_second": 0.607, "step": 450 }, { "epoch": 1.3, "learning_rate": 0.00043472584856396867, "loss": 2.7655, "step": 500 }, { "epoch": 1.3, "eval_BLEU_ach": 16.8644, "eval_BLEU_lgg": 17.662, "eval_BLEU_lug": 26.7519, "eval_BLEU_mean": 18.8597, "eval_BLEU_nyn": 14.0338, "eval_BLEU_teo": 18.9866, "eval_loss": 2.6606709957122803, "eval_runtime": 82.2093, "eval_samples_per_second": 30.41, "eval_steps_per_second": 0.608, "step": 500 }, { "epoch": 1.44, "eval_BLEU_ach": 17.7792, "eval_BLEU_lgg": 17.2092, "eval_BLEU_lug": 26.9857, "eval_BLEU_mean": 18.9292, "eval_BLEU_nyn": 13.7654, "eval_BLEU_teo": 18.9065, "eval_loss": 2.6529688835144043, "eval_runtime": 81.4731, "eval_samples_per_second": 30.685, "eval_steps_per_second": 0.614, "step": 550 }, { "epoch": 1.57, "eval_BLEU_ach": 17.5146, "eval_BLEU_lgg": 17.6765, "eval_BLEU_lug": 26.2161, "eval_BLEU_mean": 18.7786, "eval_BLEU_nyn": 13.9067, "eval_BLEU_teo": 18.5792, "eval_loss": 2.640359401702881, "eval_runtime": 82.7573, "eval_samples_per_second": 30.209, "eval_steps_per_second": 0.604, "step": 600 }, { "epoch": 1.7, "eval_BLEU_ach": 18.0286, "eval_BLEU_lgg": 17.2142, "eval_BLEU_lug": 27.1338, "eval_BLEU_mean": 19.1169, "eval_BLEU_nyn": 14.3399, "eval_BLEU_teo": 18.8677, "eval_loss": 2.6239874362945557, "eval_runtime": 83.0768, "eval_samples_per_second": 30.093, "eval_steps_per_second": 0.602, "step": 650 }, { "epoch": 1.83, "eval_BLEU_ach": 18.2373, "eval_BLEU_lgg": 17.8534, "eval_BLEU_lug": 27.2152, "eval_BLEU_mean": 19.2291, "eval_BLEU_nyn": 14.9249, "eval_BLEU_teo": 17.9148, "eval_loss": 2.6153488159179688, "eval_runtime": 83.4372, "eval_samples_per_second": 29.963, "eval_steps_per_second": 0.599, "step": 700 }, { "epoch": 1.96, "eval_BLEU_ach": 18.1915, "eval_BLEU_lgg": 18.7514, "eval_BLEU_lug": 27.1244, "eval_BLEU_mean": 19.5482, "eval_BLEU_nyn": 14.923, "eval_BLEU_teo": 18.7509, "eval_loss": 2.605884075164795, "eval_runtime": 83.7259, "eval_samples_per_second": 29.859, "eval_steps_per_second": 0.597, "step": 750 }, { "epoch": 2.09, "eval_BLEU_ach": 18.1001, "eval_BLEU_lgg": 18.7958, "eval_BLEU_lug": 27.0829, "eval_BLEU_mean": 19.6395, "eval_BLEU_nyn": 14.6849, "eval_BLEU_teo": 19.534, "eval_loss": 2.5994479656219482, "eval_runtime": 81.9571, "eval_samples_per_second": 30.504, "eval_steps_per_second": 0.61, "step": 800 }, { "epoch": 2.22, "eval_BLEU_ach": 18.4623, "eval_BLEU_lgg": 18.923, "eval_BLEU_lug": 27.8801, "eval_BLEU_mean": 20.1857, "eval_BLEU_nyn": 15.4508, "eval_BLEU_teo": 20.2125, "eval_loss": 2.6000475883483887, "eval_runtime": 82.634, "eval_samples_per_second": 30.254, "eval_steps_per_second": 0.605, "step": 850 }, { "epoch": 2.35, "eval_BLEU_ach": 17.984, "eval_BLEU_lgg": 19.1317, "eval_BLEU_lug": 27.4247, "eval_BLEU_mean": 19.9146, "eval_BLEU_nyn": 15.4422, "eval_BLEU_teo": 19.5903, "eval_loss": 2.595747470855713, "eval_runtime": 82.2257, "eval_samples_per_second": 30.404, "eval_steps_per_second": 0.608, "step": 900 }, { "epoch": 2.48, "eval_BLEU_ach": 18.2931, "eval_BLEU_lgg": 19.5551, "eval_BLEU_lug": 26.6605, "eval_BLEU_mean": 19.8366, "eval_BLEU_nyn": 14.4395, "eval_BLEU_teo": 20.2348, "eval_loss": 2.5794920921325684, "eval_runtime": 83.6122, "eval_samples_per_second": 29.9, "eval_steps_per_second": 0.598, "step": 950 }, { "epoch": 2.61, "learning_rate": 0.0003694516971279374, "loss": 2.2915, "step": 1000 }, { "epoch": 2.61, "eval_BLEU_ach": 19.06, "eval_BLEU_lgg": 19.0945, "eval_BLEU_lug": 27.6451, "eval_BLEU_mean": 20.1345, "eval_BLEU_nyn": 15.1057, "eval_BLEU_teo": 19.7672, "eval_loss": 2.580702781677246, "eval_runtime": 84.0777, "eval_samples_per_second": 29.734, "eval_steps_per_second": 0.595, "step": 1000 } ], "max_steps": 3830, "num_train_epochs": 10, "total_flos": 7.668611525246976e+16, "trial_name": null, "trial_params": null }