{ "best_metric": 1.1659743785858154, "best_model_checkpoint": "/tmp/model/checkpoint-3000", "epoch": 3.0, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 8.333333333333334e-06, "loss": 3.4301, "step": 50 }, { "epoch": 0.1, "learning_rate": 1.6666666666666667e-05, "loss": 2.1638, "step": 100 }, { "epoch": 0.15, "learning_rate": 2.5e-05, "loss": 1.8374, "step": 150 }, { "epoch": 0.2, "learning_rate": 3.3333333333333335e-05, "loss": 1.7298, "step": 200 }, { "epoch": 0.25, "learning_rate": 4.166666666666667e-05, "loss": 1.6427, "step": 250 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 1.5828, "step": 300 }, { "epoch": 0.35, "learning_rate": 4.9074074074074075e-05, "loss": 1.5323, "step": 350 }, { "epoch": 0.4, "learning_rate": 4.814814814814815e-05, "loss": 1.5066, "step": 400 }, { "epoch": 0.45, "learning_rate": 4.722222222222222e-05, "loss": 1.4769, "step": 450 }, { "epoch": 0.5, "learning_rate": 4.62962962962963e-05, "loss": 1.45, "step": 500 }, { "epoch": 0.55, "learning_rate": 4.5370370370370374e-05, "loss": 1.4125, "step": 550 }, { "epoch": 0.6, "learning_rate": 4.4444444444444447e-05, "loss": 1.3836, "step": 600 }, { "epoch": 0.65, "learning_rate": 4.351851851851852e-05, "loss": 1.3856, "step": 650 }, { "epoch": 0.7, "learning_rate": 4.259259259259259e-05, "loss": 1.3565, "step": 700 }, { "epoch": 0.75, "learning_rate": 4.166666666666667e-05, "loss": 1.3546, "step": 750 }, { "epoch": 0.8, "learning_rate": 4.074074074074074e-05, "loss": 1.3101, "step": 800 }, { "epoch": 0.85, "learning_rate": 3.981481481481482e-05, "loss": 1.3605, "step": 850 }, { "epoch": 0.9, "learning_rate": 3.888888888888889e-05, "loss": 1.3175, "step": 900 }, { "epoch": 0.95, "learning_rate": 3.7962962962962964e-05, "loss": 1.3235, "step": 950 }, { "epoch": 1.0, "learning_rate": 3.7037037037037037e-05, "loss": 1.3046, "step": 1000 }, { "epoch": 1.0, "eval_gen_len": 20.0, "eval_loss": 1.2609087228775024, "eval_rouge1": 13.4574, "eval_rouge2": 9.198, "eval_rougeL": 13.0881, "eval_rougeLsum": 13.3368, "eval_runtime": 191.5141, "eval_samples_per_second": 10.443, "eval_steps_per_second": 0.653, "step": 1000 }, { "epoch": 1.05, "learning_rate": 3.611111111111111e-05, "loss": 1.2635, "step": 1050 }, { "epoch": 1.1, "learning_rate": 3.518518518518519e-05, "loss": 1.2522, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.425925925925926e-05, "loss": 1.2465, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.3333333333333335e-05, "loss": 1.2389, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.240740740740741e-05, "loss": 1.2321, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.148148148148148e-05, "loss": 1.2349, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.055555555555556e-05, "loss": 1.2442, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.962962962962963e-05, "loss": 1.2462, "step": 1400 }, { "epoch": 1.45, "learning_rate": 2.8703703703703706e-05, "loss": 1.2083, "step": 1450 }, { "epoch": 1.5, "learning_rate": 2.777777777777778e-05, "loss": 1.2251, "step": 1500 }, { "epoch": 1.55, "learning_rate": 2.6851851851851855e-05, "loss": 1.2295, "step": 1550 }, { "epoch": 1.6, "learning_rate": 2.5925925925925925e-05, "loss": 1.2295, "step": 1600 }, { "epoch": 1.65, "learning_rate": 2.5e-05, "loss": 1.221, "step": 1650 }, { "epoch": 1.7, "learning_rate": 2.4074074074074074e-05, "loss": 1.2059, "step": 1700 }, { "epoch": 1.75, "learning_rate": 2.314814814814815e-05, "loss": 1.1994, "step": 1750 }, { "epoch": 1.8, "learning_rate": 2.2222222222222223e-05, "loss": 1.19, "step": 1800 }, { "epoch": 1.85, "learning_rate": 2.1296296296296296e-05, "loss": 1.2009, "step": 1850 }, { "epoch": 1.9, "learning_rate": 2.037037037037037e-05, "loss": 1.1991, "step": 1900 }, { "epoch": 1.95, "learning_rate": 1.9444444444444445e-05, "loss": 1.1965, "step": 1950 }, { "epoch": 2.0, "learning_rate": 1.8518518518518518e-05, "loss": 1.1969, "step": 2000 }, { "epoch": 2.0, "eval_gen_len": 20.0, "eval_loss": 1.1954196691513062, "eval_rouge1": 13.7683, "eval_rouge2": 9.4853, "eval_rougeL": 13.3723, "eval_rougeLsum": 13.6358, "eval_runtime": 191.2997, "eval_samples_per_second": 10.455, "eval_steps_per_second": 0.653, "step": 2000 }, { "epoch": 2.05, "learning_rate": 1.7592592592592595e-05, "loss": 1.1336, "step": 2050 }, { "epoch": 2.1, "learning_rate": 1.6666666666666667e-05, "loss": 1.1388, "step": 2100 }, { "epoch": 2.15, "learning_rate": 1.574074074074074e-05, "loss": 1.1374, "step": 2150 }, { "epoch": 2.2, "learning_rate": 1.4814814814814815e-05, "loss": 1.1564, "step": 2200 }, { "epoch": 2.25, "learning_rate": 1.388888888888889e-05, "loss": 1.1277, "step": 2250 }, { "epoch": 2.3, "learning_rate": 1.2962962962962962e-05, "loss": 1.1429, "step": 2300 }, { "epoch": 2.35, "learning_rate": 1.2037037037037037e-05, "loss": 1.1384, "step": 2350 }, { "epoch": 2.4, "learning_rate": 1.1111111111111112e-05, "loss": 1.1301, "step": 2400 }, { "epoch": 2.45, "learning_rate": 1.0185185185185185e-05, "loss": 1.1333, "step": 2450 }, { "epoch": 2.5, "learning_rate": 9.259259259259259e-06, "loss": 1.1182, "step": 2500 }, { "epoch": 2.55, "learning_rate": 8.333333333333334e-06, "loss": 1.1341, "step": 2550 }, { "epoch": 2.6, "learning_rate": 7.4074074074074075e-06, "loss": 1.1139, "step": 2600 }, { "epoch": 2.65, "learning_rate": 6.481481481481481e-06, "loss": 1.1337, "step": 2650 }, { "epoch": 2.7, "learning_rate": 5.555555555555556e-06, "loss": 1.1241, "step": 2700 }, { "epoch": 2.75, "learning_rate": 4.6296296296296296e-06, "loss": 1.1179, "step": 2750 }, { "epoch": 2.8, "learning_rate": 3.7037037037037037e-06, "loss": 1.1187, "step": 2800 }, { "epoch": 2.85, "learning_rate": 2.777777777777778e-06, "loss": 1.1222, "step": 2850 }, { "epoch": 2.9, "learning_rate": 1.8518518518518519e-06, "loss": 1.1274, "step": 2900 }, { "epoch": 2.95, "learning_rate": 9.259259259259259e-07, "loss": 1.1162, "step": 2950 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.1146, "step": 3000 }, { "epoch": 3.0, "eval_gen_len": 20.0, "eval_loss": 1.1659743785858154, "eval_rouge1": 14.9893, "eval_rouge2": 10.2707, "eval_rougeL": 14.4389, "eval_rougeLsum": 14.7875, "eval_runtime": 191.6936, "eval_samples_per_second": 10.433, "eval_steps_per_second": 0.652, "step": 3000 } ], "logging_steps": 50, "max_steps": 3000, "num_train_epochs": 3, "save_steps": 500, "total_flos": 6499688477884416.0, "trial_name": null, "trial_params": null }