{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32827994984611875, "eval_steps": 720, "global_step": 5760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 0.3646579682826996, "learning_rate": 9.795674104471232e-05, "loss": 0.0784, "step": 720 }, { "epoch": 0.04, "eval_bertscore": 0.7196829915046692, "eval_loss": 0.10667099058628082, "eval_rouge1": 0.5424204681399662, "eval_rouge2": 0.3232621307936826, "eval_rougeL": 0.4168316330799737, "eval_rougeLsum": 0.41874171810922023, "eval_runtime": 50.0749, "eval_samples_per_second": 1.078, "eval_steps_per_second": 0.28, "step": 720 }, { "epoch": 0.08, "grad_norm": NaN, "learning_rate": 9.590778262232482e-05, "loss": 0.0858, "step": 1440 }, { "epoch": 0.08, "eval_bertscore": 0.6401040554046631, "eval_loss": 0.18695296347141266, "eval_rouge1": 0.4271646967472444, "eval_rouge2": 0.1659738534008396, "eval_rougeL": 0.28939047028045584, "eval_rougeLsum": 0.28900025002813623, "eval_runtime": 43.3827, "eval_samples_per_second": 1.245, "eval_steps_per_second": 0.323, "step": 1440 }, { "epoch": 0.12, "grad_norm": 0.3369450867176056, "learning_rate": 9.38559744663874e-05, "loss": 0.0887, "step": 2160 }, { "epoch": 0.12, "eval_bertscore": 0.7218716740608215, "eval_loss": 0.1099499836564064, "eval_rouge1": 0.5524760977800962, "eval_rouge2": 0.330958882130141, "eval_rougeL": 0.4259051117722474, "eval_rougeLsum": 0.42919968644337714, "eval_runtime": 47.1009, "eval_samples_per_second": 1.146, "eval_steps_per_second": 0.297, "step": 2160 }, { "epoch": 0.16, "grad_norm": 0.36330386996269226, "learning_rate": 9.180416631044998e-05, "loss": 0.0933, "step": 2880 }, { "epoch": 0.16, "eval_bertscore": 0.7138540744781494, "eval_loss": 0.1120433360338211, "eval_rouge1": 0.5390251173909333, "eval_rouge2": 0.31146103356099275, "eval_rougeL": 0.41387331131584476, "eval_rougeLsum": 0.4135311998867288, "eval_runtime": 46.0195, "eval_samples_per_second": 1.173, "eval_steps_per_second": 0.304, "step": 2880 }, { "epoch": 0.21, "grad_norm": 0.35335826873779297, "learning_rate": 8.975235815451256e-05, "loss": 0.0862, "step": 3600 }, { "epoch": 0.21, "eval_bertscore": 0.7312328219413757, "eval_loss": 0.10742145031690598, "eval_rouge1": 0.5669051469800668, "eval_rouge2": 0.3520718989284114, "eval_rougeL": 0.44425293679893696, "eval_rougeLsum": 0.44695777725182906, "eval_runtime": 45.6563, "eval_samples_per_second": 1.183, "eval_steps_per_second": 0.307, "step": 3600 }, { "epoch": 0.25, "grad_norm": 0.37303468585014343, "learning_rate": 8.770339973212504e-05, "loss": 0.0911, "step": 4320 }, { "epoch": 0.25, "eval_bertscore": 0.7096375226974487, "eval_loss": 0.11186981201171875, "eval_rouge1": 0.5333109271513738, "eval_rouge2": 0.30338933797823264, "eval_rougeL": 0.4003430978893555, "eval_rougeLsum": 0.4005552066640774, "eval_runtime": 43.9874, "eval_samples_per_second": 1.228, "eval_steps_per_second": 0.318, "step": 4320 }, { "epoch": 0.29, "grad_norm": 0.30176717042922974, "learning_rate": 8.565159157618762e-05, "loss": 0.0893, "step": 5040 }, { "epoch": 0.29, "eval_bertscore": 0.7181953191757202, "eval_loss": 0.10768043249845505, "eval_rouge1": 0.5493961807050101, "eval_rouge2": 0.3304637891082364, "eval_rougeL": 0.42184528341938216, "eval_rougeLsum": 0.4241212110511772, "eval_runtime": 45.8651, "eval_samples_per_second": 1.177, "eval_steps_per_second": 0.305, "step": 5040 }, { "epoch": 0.33, "grad_norm": 0.35905396938323975, "learning_rate": 8.35997834202502e-05, "loss": 0.0895, "step": 5760 }, { "epoch": 0.33, "eval_bertscore": 0.7235485315322876, "eval_loss": 0.10892420262098312, "eval_rouge1": 0.5464214086441583, "eval_rouge2": 0.33055059501726136, "eval_rougeL": 0.4309761675921166, "eval_rougeLsum": 0.43243067509302885, "eval_runtime": 46.6469, "eval_samples_per_second": 1.158, "eval_steps_per_second": 0.3, "step": 5760 } ], "logging_steps": 720, "max_steps": 35092, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2880, "total_flos": 4.993009003266048e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }