{ "best_metric": 2.0858356952667236, "best_model_checkpoint": "./models/checkpoint-70000", "epoch": 0.9752155923041558, "global_step": 70000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.7678058113561536e-05, "loss": 2.5513, "step": 10000 }, { "epoch": 0.14, "eval_gen_len": 19.0, "eval_loss": 2.201458692550659, "eval_rouge1": 12.1725, "eval_rouge2": 2.4216, "eval_rougeL": 12.077, "eval_rougeLsum": 12.1411, "eval_runtime": 1560.686, "eval_samples_per_second": 8.565, "eval_steps_per_second": 2.141, "step": 10000 }, { "epoch": 0.28, "learning_rate": 4.535611622712307e-05, "loss": 2.4881, "step": 20000 }, { "epoch": 0.28, "eval_gen_len": 19.0, "eval_loss": 2.1785953044891357, "eval_rouge1": 12.7586, "eval_rouge2": 2.6851, "eval_rougeL": 12.6518, "eval_rougeLsum": 12.733, "eval_runtime": 1548.5251, "eval_samples_per_second": 8.633, "eval_steps_per_second": 2.158, "step": 20000 }, { "epoch": 0.42, "learning_rate": 4.3034174340684604e-05, "loss": 2.459, "step": 30000 }, { "epoch": 0.42, "eval_gen_len": 19.0, "eval_loss": 2.1513352394104004, "eval_rouge1": 12.7004, "eval_rouge2": 2.6318, "eval_rougeL": 12.5832, "eval_rougeLsum": 12.668, "eval_runtime": 1564.1475, "eval_samples_per_second": 8.547, "eval_steps_per_second": 2.137, "step": 30000 }, { "epoch": 0.56, "learning_rate": 4.071223245424614e-05, "loss": 2.4361, "step": 40000 }, { "epoch": 0.56, "eval_gen_len": 19.0, "eval_loss": 2.127916097640991, "eval_rouge1": 13.1946, "eval_rouge2": 2.8216, "eval_rougeL": 13.0738, "eval_rougeLsum": 13.1592, "eval_runtime": 1584.478, "eval_samples_per_second": 8.437, "eval_steps_per_second": 2.109, "step": 40000 }, { "epoch": 0.7, "learning_rate": 3.8390290567807665e-05, "loss": 2.4613, "step": 50000 }, { "epoch": 0.7, "eval_gen_len": 19.0, "eval_loss": 2.098001003265381, "eval_rouge1": 12.8818, "eval_rouge2": 2.8541, "eval_rougeL": 12.7484, "eval_rougeLsum": 12.8289, "eval_runtime": 1565.1942, "eval_samples_per_second": 8.541, "eval_steps_per_second": 2.135, "step": 50000 }, { "epoch": 0.84, "learning_rate": 3.60683486813692e-05, "loss": 2.462, "step": 60000 }, { "epoch": 0.84, "eval_gen_len": 19.0, "eval_loss": 2.0897421836853027, "eval_rouge1": 13.1141, "eval_rouge2": 2.8014, "eval_rougeL": 12.9971, "eval_rougeLsum": 13.0729, "eval_runtime": 1561.4037, "eval_samples_per_second": 8.562, "eval_steps_per_second": 2.14, "step": 60000 }, { "epoch": 0.98, "learning_rate": 3.374640679493074e-05, "loss": 2.4478, "step": 70000 }, { "epoch": 0.98, "eval_gen_len": 19.0, "eval_loss": 2.0858356952667236, "eval_rouge1": 13.4344, "eval_rouge2": 2.8499, "eval_rougeL": 13.3259, "eval_rougeLsum": 13.3837, "eval_runtime": 1559.9208, "eval_samples_per_second": 8.57, "eval_steps_per_second": 2.142, "step": 70000 } ], "max_steps": 215337, "num_train_epochs": 3, "total_flos": 1.4804990931554304e+17, "trial_name": null, "trial_params": null }