{ "best_metric": 0.06889855116605759, "best_model_checkpoint": "./models/1/checkpoint-9737", "epoch": 10.0, "global_step": 13910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "learning_rate": 3.591954022988506e-05, "loss": 0.0876, "step": 500 }, { "epoch": 0.72, "learning_rate": 4.884970485848343e-05, "loss": 0.0879, "step": 1000 }, { "epoch": 1.0, "eval_BLEU": 8.064725756490624, "eval_BLEU-Bigram-Precision": 10.432874977383753, "eval_BLEU-Trigram-Precision": 5.407291933269737, "eval_BLEU-Unigram-Precision": 21.631135458605595, "eval_ROUGE-2": 12.39926519031995, "eval_ROUGE-L": 24.731962013373813, "eval_Sacre-Bigram-Precision": 8.552870639534884, "eval_Sacre-Trigram-Precision": 4.851033999298983, "eval_Sacre-Unigram-Precision": 19.295765314011796, "eval_SacreBLEU": 7.1292786955103695, "eval_loss": 0.0844602882862091, "eval_runtime": 523.4808, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.296, "step": 1391 }, { "epoch": 1.08, "learning_rate": 4.6961555925533526e-05, "loss": 0.0829, "step": 1500 }, { "epoch": 1.44, "learning_rate": 4.506962312698653e-05, "loss": 0.0747, "step": 2000 }, { "epoch": 1.8, "learning_rate": 4.317769032843954e-05, "loss": 0.0738, "step": 2500 }, { "epoch": 2.0, "eval_BLEU": 7.726645595861129, "eval_BLEU-Bigram-Precision": 9.779983147645352, "eval_BLEU-Trigram-Precision": 5.376859377396105, "eval_BLEU-Unigram-Precision": 19.00918707221551, "eval_ROUGE-2": 14.567403936228773, "eval_ROUGE-L": 26.962145889007623, "eval_Sacre-Bigram-Precision": 8.340425531914894, "eval_Sacre-Trigram-Precision": 5.06410008133545, "eval_Sacre-Unigram-Precision": 17.33626552335896, "eval_SacreBLEU": 7.08253441311445, "eval_loss": 0.07733357697725296, "eval_runtime": 594.316, "eval_samples_per_second": 2.081, "eval_steps_per_second": 0.261, "step": 2782 }, { "epoch": 2.16, "learning_rate": 4.1285757529892535e-05, "loss": 0.0669, "step": 3000 }, { "epoch": 2.52, "learning_rate": 3.939382473134555e-05, "loss": 0.0631, "step": 3500 }, { "epoch": 2.88, "learning_rate": 3.7505675798395645e-05, "loss": 0.0616, "step": 4000 }, { "epoch": 3.0, "eval_BLEU": 13.75944614514078, "eval_BLEU-Bigram-Precision": 17.05563909774436, "eval_BLEU-Trigram-Precision": 9.895979758223222, "eval_BLEU-Unigram-Precision": 31.226259170122074, "eval_ROUGE-2": 16.762154637582928, "eval_ROUGE-L": 29.733231785857146, "eval_Sacre-Bigram-Precision": 14.784847370356749, "eval_Sacre-Trigram-Precision": 9.445382434455736, "eval_Sacre-Unigram-Precision": 28.888232688616565, "eval_SacreBLEU": 12.80566461172721, "eval_loss": 0.07288952171802521, "eval_runtime": 656.3315, "eval_samples_per_second": 1.885, "eval_steps_per_second": 0.236, "step": 4173 }, { "epoch": 3.24, "learning_rate": 3.561374299984865e-05, "loss": 0.0556, "step": 4500 }, { "epoch": 3.59, "learning_rate": 3.372181020130165e-05, "loss": 0.0522, "step": 5000 }, { "epoch": 3.95, "learning_rate": 3.1829877402754655e-05, "loss": 0.0541, "step": 5500 }, { "epoch": 4.0, "eval_BLEU": 12.584990506585802, "eval_BLEU-Bigram-Precision": 15.284490474363894, "eval_BLEU-Trigram-Precision": 9.189817259955635, "eval_BLEU-Unigram-Precision": 27.802290416935204, "eval_ROUGE-2": 18.2427503756888, "eval_ROUGE-L": 31.248445460832464, "eval_Sacre-Bigram-Precision": 13.375350140056023, "eval_Sacre-Trigram-Precision": 8.759613065730592, "eval_Sacre-Unigram-Precision": 25.743221169552434, "eval_SacreBLEU": 11.731554660360798, "eval_loss": 0.07118678092956543, "eval_runtime": 463.4231, "eval_samples_per_second": 2.669, "eval_steps_per_second": 0.334, "step": 5564 }, { "epoch": 4.31, "learning_rate": 2.9937944604207657e-05, "loss": 0.0464, "step": 6000 }, { "epoch": 4.67, "learning_rate": 2.8046011805660665e-05, "loss": 0.0459, "step": 6500 }, { "epoch": 5.0, "eval_BLEU": 15.1776190124192, "eval_BLEU-Bigram-Precision": 18.32776890695311, "eval_BLEU-Trigram-Precision": 11.247289809684414, "eval_BLEU-Unigram-Precision": 32.42811501597444, "eval_ROUGE-2": 17.936785005872903, "eval_ROUGE-L": 31.307371141735135, "eval_Sacre-Bigram-Precision": 16.26334519572954, "eval_Sacre-Trigram-Precision": 10.753317119724164, "eval_Sacre-Unigram-Precision": 30.265755070515205, "eval_SacreBLEU": 14.242434580808037, "eval_loss": 0.06986960768699646, "eval_runtime": 398.2357, "eval_samples_per_second": 3.106, "eval_steps_per_second": 0.389, "step": 6955 }, { "epoch": 5.03, "learning_rate": 2.6154079007113667e-05, "loss": 0.0455, "step": 7000 }, { "epoch": 5.39, "learning_rate": 2.4262146208566673e-05, "loss": 0.0393, "step": 7500 }, { "epoch": 5.75, "learning_rate": 2.2370213410019678e-05, "loss": 0.0405, "step": 8000 }, { "epoch": 6.0, "eval_BLEU": 17.03804435280673, "eval_BLEU-Bigram-Precision": 20.16791440608614, "eval_BLEU-Trigram-Precision": 12.76924045849603, "eval_BLEU-Unigram-Precision": 34.79696172947707, "eval_ROUGE-2": 20.00404092356461, "eval_ROUGE-L": 33.44410880996549, "eval_Sacre-Bigram-Precision": 18.140892297184017, "eval_Sacre-Trigram-Precision": 12.388609066253391, "eval_Sacre-Unigram-Precision": 32.66758356754171, "eval_SacreBLEU": 16.183245622226913, "eval_loss": 0.06919773668050766, "eval_runtime": 389.7744, "eval_samples_per_second": 3.174, "eval_steps_per_second": 0.398, "step": 8346 }, { "epoch": 6.11, "learning_rate": 2.047828061147268e-05, "loss": 0.0395, "step": 8500 }, { "epoch": 6.47, "learning_rate": 1.8586347812925686e-05, "loss": 0.0351, "step": 9000 }, { "epoch": 6.83, "learning_rate": 1.669441501437869e-05, "loss": 0.0356, "step": 9500 }, { "epoch": 7.0, "eval_BLEU": 18.45635959837559, "eval_BLEU-Bigram-Precision": 21.599746072052056, "eval_BLEU-Trigram-Precision": 13.98176291793313, "eval_BLEU-Unigram-Precision": 36.946429662207564, "eval_ROUGE-2": 20.663766964343406, "eval_ROUGE-L": 33.85196384000605, "eval_Sacre-Bigram-Precision": 19.43390720747009, "eval_Sacre-Trigram-Precision": 13.581706410862664, "eval_Sacre-Unigram-Precision": 34.678927680798004, "eval_SacreBLEU": 17.534804534318486, "eval_loss": 0.06889855116605759, "eval_runtime": 289.9351, "eval_samples_per_second": 4.266, "eval_steps_per_second": 0.535, "step": 9737 }, { "epoch": 7.19, "learning_rate": 1.4802482215831695e-05, "loss": 0.0324, "step": 10000 }, { "epoch": 7.55, "learning_rate": 1.2910549417284698e-05, "loss": 0.032, "step": 10500 }, { "epoch": 7.91, "learning_rate": 1.1018616618737702e-05, "loss": 0.0316, "step": 11000 }, { "epoch": 8.0, "eval_BLEU": 23.036610051092996, "eval_BLEU-Bigram-Precision": 26.881516587677723, "eval_BLEU-Trigram-Precision": 17.706261436868488, "eval_BLEU-Unigram-Precision": 44.332174416920175, "eval_ROUGE-2": 21.29088469404566, "eval_ROUGE-L": 35.036834725440904, "eval_Sacre-Bigram-Precision": 24.360966513868387, "eval_Sacre-Trigram-Precision": 17.249540910018364, "eval_Sacre-Unigram-Precision": 41.78435079135624, "eval_SacreBLEU": 21.988824762940034, "eval_loss": 0.0689924880862236, "eval_runtime": 259.2001, "eval_samples_per_second": 4.772, "eval_steps_per_second": 0.598, "step": 11128 }, { "epoch": 8.27, "learning_rate": 9.126683820190707e-06, "loss": 0.0295, "step": 11500 }, { "epoch": 8.63, "learning_rate": 7.238534887240806e-06, "loss": 0.0283, "step": 12000 }, { "epoch": 8.99, "learning_rate": 5.3466020886938095e-06, "loss": 0.0287, "step": 12500 }, { "epoch": 9.0, "eval_BLEU": 26.995788994612766, "eval_BLEU-Bigram-Precision": 30.950083340643914, "eval_BLEU-Trigram-Precision": 20.98232920550995, "eval_BLEU-Unigram-Precision": 50.522155190347405, "eval_ROUGE-2": 22.105956241069418, "eval_ROUGE-L": 35.85099346344104, "eval_Sacre-Bigram-Precision": 28.175123119324088, "eval_Sacre-Trigram-Precision": 20.573315467075037, "eval_Sacre-Unigram-Precision": 47.89045785194694, "eval_SacreBLEU": 25.924953054404842, "eval_loss": 0.06922509521245956, "eval_runtime": 198.6398, "eval_samples_per_second": 6.227, "eval_steps_per_second": 0.78, "step": 12519 }, { "epoch": 9.35, "learning_rate": 3.454669290146814e-06, "loss": 0.0271, "step": 13000 }, { "epoch": 9.71, "learning_rate": 1.5627364915998186e-06, "loss": 0.0265, "step": 13500 }, { "epoch": 10.0, "eval_BLEU": 26.712396231914138, "eval_BLEU-Bigram-Precision": 30.61851211072664, "eval_BLEU-Trigram-Precision": 20.719279806242287, "eval_BLEU-Unigram-Precision": 49.895307303855155, "eval_ROUGE-2": 22.458649124626863, "eval_ROUGE-L": 36.256812107261126, "eval_Sacre-Bigram-Precision": 27.962698554345884, "eval_Sacre-Trigram-Precision": 20.387249114521843, "eval_Sacre-Unigram-Precision": 47.333925324538036, "eval_SacreBLEU": 25.70732578879903, "eval_loss": 0.06933248043060303, "eval_runtime": 190.4819, "eval_samples_per_second": 6.494, "eval_steps_per_second": 0.814, "step": 13910 } ], "max_steps": 13910, "num_train_epochs": 10, "total_flos": 3.016954478592e+16, "trial_name": null, "trial_params": null }