{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 68220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.73, "learning_rate": 2.780123131046614e-05, "loss": 0.635, "step": 5000 }, { "epoch": 0.73, "eval_gen_len": 34.1, "eval_loss": 0.5430836081504822, "eval_rouge1": 19.0735, "eval_rouge2": 11.8399, "eval_rougeL": 18.8386, "eval_rougeLsum": 18.9348, "eval_runtime": 3057.941, "eval_samples_per_second": 1.116, "step": 5000 }, { "epoch": 1.47, "learning_rate": 2.5602462620932278e-05, "loss": 0.4721, "step": 10000 }, { "epoch": 1.47, "eval_gen_len": 34.1, "eval_loss": 0.5901889204978943, "eval_rouge1": 18.6178, "eval_rouge2": 11.5042, "eval_rougeL": 18.3666, "eval_rougeLsum": 18.4882, "eval_runtime": 3066.7913, "eval_samples_per_second": 1.113, "step": 10000 }, { "epoch": 2.2, "learning_rate": 2.3403693931398417e-05, "loss": 0.3803, "step": 15000 }, { "epoch": 2.2, "eval_gen_len": 33.1, "eval_loss": 0.6489335298538208, "eval_rouge1": 20.6093, "eval_rouge2": 12.6294, "eval_rougeL": 20.2763, "eval_rougeLsum": 20.3561, "eval_runtime": 2950.4654, "eval_samples_per_second": 1.157, "step": 15000 }, { "epoch": 2.93, "learning_rate": 2.120492524186456e-05, "loss": 0.3072, "step": 20000 }, { "epoch": 2.93, "eval_gen_len": 33.0, "eval_loss": 0.6617271304130554, "eval_rouge1": 21.175, "eval_rouge2": 12.9525, "eval_rougeL": 20.85, "eval_rougeLsum": 20.9327, "eval_runtime": 2933.4173, "eval_samples_per_second": 1.163, "step": 20000 }, { "epoch": 3.66, "learning_rate": 1.9006156552330694e-05, "loss": 0.225, "step": 25000 }, { "epoch": 3.66, "eval_gen_len": 33.7, "eval_loss": 0.7911365628242493, "eval_rouge1": 20.0159, "eval_rouge2": 12.3403, "eval_rougeL": 19.7354, "eval_rougeLsum": 19.831, "eval_runtime": 3007.359, "eval_samples_per_second": 1.135, "step": 25000 }, { "epoch": 4.4, "learning_rate": 1.6807387862796836e-05, "loss": 0.1862, "step": 30000 }, { "epoch": 4.4, "eval_gen_len": 33.5, "eval_loss": 0.9180275201797485, "eval_rouge1": 20.6775, "eval_rouge2": 12.58, "eval_rougeL": 20.3712, "eval_rougeLsum": 20.4162, "eval_runtime": 2990.0617, "eval_samples_per_second": 1.141, "step": 30000 }, { "epoch": 5.13, "learning_rate": 1.4608619173262973e-05, "loss": 0.1559, "step": 35000 }, { "epoch": 5.13, "eval_gen_len": 33.2, "eval_loss": 1.0511494874954224, "eval_rouge1": 20.3652, "eval_rouge2": 12.4459, "eval_rougeL": 20.0386, "eval_rougeLsum": 20.1371, "eval_runtime": 2942.5442, "eval_samples_per_second": 1.16, "step": 35000 }, { "epoch": 5.86, "learning_rate": 1.2409850483729111e-05, "loss": 0.1256, "step": 40000 }, { "epoch": 5.86, "eval_gen_len": 33.2, "eval_loss": 1.1184989213943481, "eval_rouge1": 20.6117, "eval_rouge2": 12.6157, "eval_rougeL": 20.3466, "eval_rougeLsum": 20.4447, "eval_runtime": 2945.923, "eval_samples_per_second": 1.159, "step": 40000 }, { "epoch": 6.6, "learning_rate": 1.0211081794195251e-05, "loss": 0.1044, "step": 45000 }, { "epoch": 6.6, "eval_gen_len": 33.5, "eval_loss": 1.262742280960083, "eval_rouge1": 20.4935, "eval_rouge2": 12.393, "eval_rougeL": 20.1894, "eval_rougeLsum": 20.2828, "eval_runtime": 2982.3339, "eval_samples_per_second": 1.144, "step": 45000 }, { "epoch": 7.33, "learning_rate": 8.01231310466139e-06, "loss": 0.0844, "step": 50000 }, { "epoch": 7.33, "eval_gen_len": 33.3, "eval_loss": 1.5101187229156494, "eval_rouge1": 21.5004, "eval_rouge2": 12.9803, "eval_rougeL": 21.2348, "eval_rougeLsum": 21.3046, "eval_runtime": 2964.055, "eval_samples_per_second": 1.151, "step": 50000 }, { "epoch": 8.06, "learning_rate": 5.813544415127529e-06, "loss": 0.075, "step": 55000 }, { "epoch": 8.06, "eval_gen_len": 33.1, "eval_loss": 1.5069748163223267, "eval_rouge1": 20.8058, "eval_rouge2": 12.4951, "eval_rougeL": 20.5311, "eval_rougeLsum": 20.6195, "eval_runtime": 2941.4323, "eval_samples_per_second": 1.16, "step": 55000 }, { "epoch": 8.8, "learning_rate": 3.6147757255936676e-06, "loss": 0.0612, "step": 60000 }, { "epoch": 8.8, "eval_gen_len": 33.2, "eval_loss": 1.579475998878479, "eval_rouge1": 20.9465, "eval_rouge2": 12.6351, "eval_rougeL": 20.6478, "eval_rougeLsum": 20.7292, "eval_runtime": 2955.2469, "eval_samples_per_second": 1.155, "step": 60000 }, { "epoch": 9.53, "learning_rate": 1.4160070360598064e-06, "loss": 0.0519, "step": 65000 }, { "epoch": 9.53, "eval_gen_len": 33.2, "eval_loss": 1.6573169231414795, "eval_rouge1": 20.9384, "eval_rouge2": 12.6776, "eval_rougeL": 20.6249, "eval_rougeLsum": 20.7352, "eval_runtime": 2938.7831, "eval_samples_per_second": 1.161, "step": 65000 }, { "epoch": 10.0, "step": 68220, "total_flos": 5.6695121367374234e+17, "train_runtime": 78827.7854, "train_samples_per_second": 0.865 } ], "max_steps": 68220, "num_train_epochs": 10, "total_flos": 5.6695121367374234e+17, "trial_name": null, "trial_params": null }