{ "best_metric": 2.6812784671783447, "best_model_checkpoint": "./models/final_bart/checkpoint-2000", "epoch": 5.0, "global_step": 3340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 8.982035928143711e-06, "loss": 5.6146, "step": 100 }, { "epoch": 0.3, "learning_rate": 1.7964071856287423e-05, "loss": 2.8024, "step": 200 }, { "epoch": 0.45, "learning_rate": 2.6946107784431136e-05, "loss": 2.5446, "step": 300 }, { "epoch": 0.6, "learning_rate": 2.934131736526946e-05, "loss": 2.4198, "step": 400 }, { "epoch": 0.75, "learning_rate": 2.8343313373253494e-05, "loss": 2.3636, "step": 500 }, { "epoch": 0.9, "learning_rate": 2.7345309381237524e-05, "loss": 2.3251, "step": 600 }, { "epoch": 1.05, "learning_rate": 2.6347305389221558e-05, "loss": 2.2522, "step": 700 }, { "epoch": 1.2, "learning_rate": 2.534930139720559e-05, "loss": 2.1959, "step": 800 }, { "epoch": 1.35, "learning_rate": 2.4351297405189622e-05, "loss": 2.1588, "step": 900 }, { "epoch": 1.5, "learning_rate": 2.3353293413173656e-05, "loss": 2.1542, "step": 1000 }, { "epoch": 1.5, "eval_bleu1": 27.9938, "eval_bleu2": 15.5354, "eval_bleu3": 8.2494, "eval_bleu4": 4.42, "eval_gen_len": 50.08, "eval_loss": 2.749131202697754, "eval_rdass": 0.6093000173568726, "eval_rouge1": 33.5554, "eval_rouge2": 11.2371, "eval_rougeL": 22.006, "eval_runtime": 24.7242, "eval_samples_per_second": 4.045, "eval_steps_per_second": 0.04, "step": 1000 }, { "epoch": 1.65, "learning_rate": 2.2355289421157686e-05, "loss": 2.1473, "step": 1100 }, { "epoch": 1.8, "learning_rate": 2.135728542914172e-05, "loss": 2.1459, "step": 1200 }, { "epoch": 1.95, "learning_rate": 2.0359281437125747e-05, "loss": 2.1324, "step": 1300 }, { "epoch": 2.1, "learning_rate": 1.936127744510978e-05, "loss": 2.0676, "step": 1400 }, { "epoch": 2.25, "learning_rate": 1.836327345309381e-05, "loss": 2.0206, "step": 1500 }, { "epoch": 2.4, "learning_rate": 1.7365269461077845e-05, "loss": 2.0198, "step": 1600 }, { "epoch": 2.54, "learning_rate": 1.6367265469061875e-05, "loss": 2.0177, "step": 1700 }, { "epoch": 2.69, "learning_rate": 1.536926147704591e-05, "loss": 2.0143, "step": 1800 }, { "epoch": 2.84, "learning_rate": 1.437125748502994e-05, "loss": 2.0075, "step": 1900 }, { "epoch": 2.99, "learning_rate": 1.3373253493013973e-05, "loss": 2.0071, "step": 2000 }, { "epoch": 2.99, "eval_bleu1": 29.6866, "eval_bleu2": 17.1396, "eval_bleu3": 9.7016, "eval_bleu4": 5.3559, "eval_gen_len": 54.04, "eval_loss": 2.6812784671783447, "eval_rdass": 0.6154999732971191, "eval_rouge1": 35.0501, "eval_rouge2": 12.2759, "eval_rougeL": 22.6669, "eval_runtime": 20.0572, "eval_samples_per_second": 4.986, "eval_steps_per_second": 0.05, "step": 2000 }, { "epoch": 3.14, "learning_rate": 1.2375249500998005e-05, "loss": 1.9318, "step": 2100 }, { "epoch": 3.29, "learning_rate": 1.1377245508982035e-05, "loss": 1.9389, "step": 2200 }, { "epoch": 3.44, "learning_rate": 1.0379241516966067e-05, "loss": 1.9214, "step": 2300 }, { "epoch": 3.59, "learning_rate": 9.3812375249501e-06, "loss": 1.9144, "step": 2400 }, { "epoch": 3.74, "learning_rate": 8.383233532934131e-06, "loss": 1.9404, "step": 2500 }, { "epoch": 3.89, "learning_rate": 7.385229540918164e-06, "loss": 1.9189, "step": 2600 }, { "epoch": 4.04, "learning_rate": 6.3872255489021955e-06, "loss": 1.9155, "step": 2700 }, { "epoch": 4.19, "learning_rate": 5.3892215568862275e-06, "loss": 1.874, "step": 2800 }, { "epoch": 4.34, "learning_rate": 4.39121756487026e-06, "loss": 1.8951, "step": 2900 }, { "epoch": 4.49, "learning_rate": 3.3932135728542917e-06, "loss": 1.8694, "step": 3000 }, { "epoch": 4.49, "eval_bleu1": 30.5261, "eval_bleu2": 17.6264, "eval_bleu3": 10.3974, "eval_bleu4": 5.4348, "eval_gen_len": 53.47, "eval_loss": 2.684814453125, "eval_rdass": 0.6248000264167786, "eval_rouge1": 35.7722, "eval_rouge2": 12.5127, "eval_rougeL": 23.3002, "eval_runtime": 17.6216, "eval_samples_per_second": 5.675, "eval_steps_per_second": 0.057, "step": 3000 }, { "epoch": 4.64, "learning_rate": 2.3952095808383233e-06, "loss": 1.8708, "step": 3100 }, { "epoch": 4.79, "learning_rate": 1.3972055888223554e-06, "loss": 1.8628, "step": 3200 }, { "epoch": 4.94, "learning_rate": 3.992015968063872e-07, "loss": 1.8613, "step": 3300 }, { "epoch": 5.0, "step": 3340, "total_flos": 5.5473275160576e+16, "train_loss": 2.181800748630912, "train_runtime": 3900.3613, "train_samples_per_second": 54.733, "train_steps_per_second": 0.856 } ], "max_steps": 3340, "num_train_epochs": 5, "total_flos": 5.5473275160576e+16, "trial_name": null, "trial_params": null }