{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.7899561578318055, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 1e-05, "loss": 0.0687, "step": 200 }, { "epoch": 0.16, "learning_rate": 2e-05, "loss": 0.0565, "step": 400 }, { "epoch": 0.24, "learning_rate": 3e-05, "loss": 0.0463, "step": 600 }, { "epoch": 0.32, "learning_rate": 4e-05, "loss": 0.0388, "step": 800 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 0.0276, "step": 1000 }, { "epoch": 0.4, "eval_bleu": 44.72601443188115, "eval_chrf": 21.151350421377852, "eval_loss": 0.2960108518600464, "eval_runtime": 393.5134, "eval_samples_per_second": 4.066, "eval_steps_per_second": 0.127, "step": 1000 }, { "epoch": 0.48, "learning_rate": 4.846790255860273e-05, "loss": 0.025, "step": 1200 }, { "epoch": 0.56, "learning_rate": 4.693580511720546e-05, "loss": 0.0239, "step": 1400 }, { "epoch": 0.64, "learning_rate": 4.5403707675808184e-05, "loss": 0.0235, "step": 1600 }, { "epoch": 0.72, "learning_rate": 4.387161023441091e-05, "loss": 0.0252, "step": 1800 }, { "epoch": 0.8, "learning_rate": 4.2339512793013634e-05, "loss": 0.0356, "step": 2000 }, { "epoch": 0.8, "eval_bleu": 47.56531492362014, "eval_chrf": 23.16773749233209, "eval_loss": 0.2850428521633148, "eval_runtime": 440.9934, "eval_samples_per_second": 3.628, "eval_steps_per_second": 0.113, "step": 2000 }, { "epoch": 0.88, "learning_rate": 4.0807415351616366e-05, "loss": 0.0682, "step": 2200 }, { "epoch": 0.96, "learning_rate": 3.927531791021909e-05, "loss": 0.0659, "step": 2400 }, { "epoch": 1.04, "learning_rate": 3.774322046882182e-05, "loss": 0.0589, "step": 2600 }, { "epoch": 1.12, "learning_rate": 3.621112302742455e-05, "loss": 0.0532, "step": 2800 }, { "epoch": 1.2, "learning_rate": 3.467902558602727e-05, "loss": 0.0533, "step": 3000 }, { "epoch": 1.2, "eval_bleu": 49.764405697578844, "eval_chrf": 24.692162908504518, "eval_loss": 0.26993224024772644, "eval_runtime": 462.0002, "eval_samples_per_second": 3.463, "eval_steps_per_second": 0.108, "step": 3000 }, { "epoch": 1.28, "learning_rate": 3.3146928144630004e-05, "loss": 0.0527, "step": 3200 }, { "epoch": 1.36, "learning_rate": 3.161483070323273e-05, "loss": 0.0497, "step": 3400 }, { "epoch": 1.43, "learning_rate": 3.0082733261835454e-05, "loss": 0.0492, "step": 3600 }, { "epoch": 1.51, "learning_rate": 2.8550635820438183e-05, "loss": 0.0504, "step": 3800 }, { "epoch": 1.59, "learning_rate": 2.7018538379040908e-05, "loss": 0.0489, "step": 4000 }, { "epoch": 1.59, "eval_bleu": 50.29334272679934, "eval_chrf": 25.16305325870508, "eval_loss": 0.26400861144065857, "eval_runtime": 481.6508, "eval_samples_per_second": 3.322, "eval_steps_per_second": 0.104, "step": 4000 }, { "epoch": 1.67, "learning_rate": 2.5486440937643636e-05, "loss": 0.0474, "step": 4200 }, { "epoch": 1.75, "learning_rate": 2.395434349624636e-05, "loss": 0.048, "step": 4400 }, { "epoch": 1.83, "learning_rate": 2.242224605484909e-05, "loss": 0.0484, "step": 4600 }, { "epoch": 1.91, "learning_rate": 2.0890148613451814e-05, "loss": 0.0471, "step": 4800 }, { "epoch": 1.99, "learning_rate": 1.9358051172054546e-05, "loss": 0.0479, "step": 5000 }, { "epoch": 1.99, "eval_bleu": 50.91689121775582, "eval_chrf": 25.72330191241805, "eval_loss": 0.2619972229003906, "eval_runtime": 498.0001, "eval_samples_per_second": 3.213, "eval_steps_per_second": 0.1, "step": 5000 }, { "epoch": 2.07, "learning_rate": 1.782595373065727e-05, "loss": 0.0425, "step": 5200 }, { "epoch": 2.15, "learning_rate": 1.629385628926e-05, "loss": 0.0418, "step": 5400 }, { "epoch": 2.23, "learning_rate": 1.4761758847862724e-05, "loss": 0.0408, "step": 5600 }, { "epoch": 2.31, "learning_rate": 1.3229661406465451e-05, "loss": 0.0418, "step": 5800 }, { "epoch": 2.39, "learning_rate": 1.1697563965068178e-05, "loss": 0.0405, "step": 6000 }, { "epoch": 2.39, "eval_bleu": 51.58204130219731, "eval_chrf": 26.294054966415302, "eval_loss": 0.2597905993461609, "eval_runtime": 493.3118, "eval_samples_per_second": 3.243, "eval_steps_per_second": 0.101, "step": 6000 }, { "epoch": 2.47, "learning_rate": 1.0165466523670906e-05, "loss": 0.0408, "step": 6200 }, { "epoch": 2.55, "learning_rate": 8.633369082273633e-06, "loss": 0.0415, "step": 6400 }, { "epoch": 2.63, "learning_rate": 7.10127164087636e-06, "loss": 0.0409, "step": 6600 }, { "epoch": 2.71, "learning_rate": 5.569174199479087e-06, "loss": 0.0407, "step": 6800 }, { "epoch": 2.79, "learning_rate": 4.037076758081814e-06, "loss": 0.0404, "step": 7000 }, { "epoch": 2.79, "eval_bleu": 51.60441673148478, "eval_chrf": 26.355725929893868, "eval_loss": 0.26132842898368835, "eval_runtime": 515.0605, "eval_samples_per_second": 3.106, "eval_steps_per_second": 0.097, "step": 7000 } ], "max_steps": 7527, "num_train_epochs": 3, "total_flos": 1.333738957824e+16, "trial_name": null, "trial_params": null }