{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.0, "eval_steps": 500, "global_step": 8336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "learning_rate": 1.9360204734484968e-05, "loss": 2.0443, "step": 500 }, { "epoch": 1.0, "eval_f1": 0.9049, "eval_gen_len": 28.363272727272726, "eval_loss": 1.7046316862106323, "eval_precision": 0.9041, "eval_recall": 0.9061, "eval_rouge1": 0.4488, "eval_rouge2": 0.203, "eval_rougeL": 0.3633, "eval_rougeLsum": 0.3633, "eval_runtime": 577.3748, "eval_samples_per_second": 4.763, "eval_steps_per_second": 0.298, "step": 521 }, { "epoch": 1.92, "learning_rate": 1.872040946896993e-05, "loss": 1.7826, "step": 1000 }, { "epoch": 2.0, "eval_f1": 0.9072, "eval_gen_len": 28.19490909090909, "eval_loss": 1.6347475051879883, "eval_precision": 0.9062, "eval_recall": 0.9085, "eval_rouge1": 0.4616, "eval_rouge2": 0.2133, "eval_rougeL": 0.3761, "eval_rougeLsum": 0.3758, "eval_runtime": 524.6485, "eval_samples_per_second": 5.242, "eval_steps_per_second": 0.328, "step": 1042 }, { "epoch": 2.88, "learning_rate": 1.8080614203454897e-05, "loss": 1.7134, "step": 1500 }, { "epoch": 3.0, "eval_f1": 0.9084, "eval_gen_len": 28.521818181818183, "eval_loss": 1.5991039276123047, "eval_precision": 0.9072, "eval_recall": 0.91, "eval_rouge1": 0.4683, "eval_rouge2": 0.2186, "eval_rougeL": 0.3824, "eval_rougeLsum": 0.3822, "eval_runtime": 539.0316, "eval_samples_per_second": 5.102, "eval_steps_per_second": 0.319, "step": 1563 }, { "epoch": 3.84, "learning_rate": 1.744081893793986e-05, "loss": 1.6664, "step": 2000 }, { "epoch": 4.0, "eval_f1": 0.9096, "eval_gen_len": 28.24981818181818, "eval_loss": 1.5767467021942139, "eval_precision": 0.9087, "eval_recall": 0.9109, "eval_rouge1": 0.4738, "eval_rouge2": 0.2233, "eval_rougeL": 0.3878, "eval_rougeLsum": 0.3876, "eval_runtime": 529.9968, "eval_samples_per_second": 5.189, "eval_steps_per_second": 0.325, "step": 2084 }, { "epoch": 4.8, "learning_rate": 1.6801023672424827e-05, "loss": 1.6296, "step": 2500 }, { "epoch": 5.0, "eval_f1": 0.9103, "eval_gen_len": 28.239636363636365, "eval_loss": 1.5595422983169556, "eval_precision": 0.9093, "eval_recall": 0.9117, "eval_rouge1": 0.4775, "eval_rouge2": 0.2265, "eval_rougeL": 0.3911, "eval_rougeLsum": 0.391, "eval_runtime": 526.5193, "eval_samples_per_second": 5.223, "eval_steps_per_second": 0.327, "step": 2605 }, { "epoch": 5.76, "learning_rate": 1.616122840690979e-05, "loss": 1.5984, "step": 3000 }, { "epoch": 6.0, "eval_f1": 0.9109, "eval_gen_len": 28.28, "eval_loss": 1.5468252897262573, "eval_precision": 0.9098, "eval_recall": 0.9124, "eval_rouge1": 0.4805, "eval_rouge2": 0.2284, "eval_rougeL": 0.3941, "eval_rougeLsum": 0.3938, "eval_runtime": 512.3397, "eval_samples_per_second": 5.368, "eval_steps_per_second": 0.336, "step": 3126 }, { "epoch": 6.72, "learning_rate": 1.5521433141394756e-05, "loss": 1.5738, "step": 3500 }, { "epoch": 7.0, "eval_f1": 0.9113, "eval_gen_len": 27.837818181818182, "eval_loss": 1.5370196104049683, "eval_precision": 0.9105, "eval_recall": 0.9124, "eval_rouge1": 0.4807, "eval_rouge2": 0.2296, "eval_rougeL": 0.3945, "eval_rougeLsum": 0.3946, "eval_runtime": 509.6023, "eval_samples_per_second": 5.396, "eval_steps_per_second": 0.338, "step": 3647 }, { "epoch": 7.68, "learning_rate": 1.0403071017274472e-05, "loss": 1.5476, "step": 4000 }, { "epoch": 8.0, "eval_f1": 0.9114, "eval_gen_len": 27.736363636363638, "eval_loss": 1.530755639076233, "eval_precision": 0.9108, "eval_recall": 0.9125, "eval_rouge1": 0.4823, "eval_rouge2": 0.2315, "eval_rougeL": 0.3963, "eval_rougeLsum": 0.3965, "eval_runtime": 510.2185, "eval_samples_per_second": 5.39, "eval_steps_per_second": 0.337, "step": 4168 }, { "epoch": 8.64, "learning_rate": 9.203454894433782e-06, "loss": 1.535, "step": 4500 }, { "epoch": 9.0, "eval_f1": 0.9116, "eval_gen_len": 27.653454545454544, "eval_loss": 1.5260871648788452, "eval_precision": 0.911, "eval_recall": 0.9125, "eval_rouge1": 0.4829, "eval_rouge2": 0.2309, "eval_rougeL": 0.3974, "eval_rougeLsum": 0.3974, "eval_runtime": 503.2649, "eval_samples_per_second": 5.464, "eval_steps_per_second": 0.342, "step": 4689 }, { "epoch": 9.6, "learning_rate": 8.003838771593091e-06, "loss": 1.52, "step": 5000 }, { "epoch": 10.0, "eval_f1": 0.9117, "eval_gen_len": 27.816, "eval_loss": 1.52312433719635, "eval_precision": 0.911, "eval_recall": 0.9128, "eval_rouge1": 0.4847, "eval_rouge2": 0.2332, "eval_rougeL": 0.3992, "eval_rougeLsum": 0.3993, "eval_runtime": 522.9989, "eval_samples_per_second": 5.258, "eval_steps_per_second": 0.329, "step": 5210 }, { "epoch": 10.56, "learning_rate": 6.8042226487524e-06, "loss": 1.5145, "step": 5500 }, { "epoch": 11.0, "eval_f1": 0.9121, "eval_gen_len": 27.360363636363637, "eval_loss": 1.519996166229248, "eval_precision": 0.9119, "eval_recall": 0.9127, "eval_rouge1": 0.4851, "eval_rouge2": 0.2339, "eval_rougeL": 0.4004, "eval_rougeLsum": 0.4006, "eval_runtime": 501.564, "eval_samples_per_second": 5.483, "eval_steps_per_second": 0.343, "step": 5731 }, { "epoch": 11.52, "learning_rate": 5.6046065259117085e-06, "loss": 1.5028, "step": 6000 }, { "epoch": 12.0, "eval_f1": 0.9122, "eval_gen_len": 27.462545454545456, "eval_loss": 1.5178437232971191, "eval_precision": 0.9118, "eval_recall": 0.9129, "eval_rouge1": 0.4858, "eval_rouge2": 0.2345, "eval_rougeL": 0.4001, "eval_rougeLsum": 0.4002, "eval_runtime": 501.8356, "eval_samples_per_second": 5.48, "eval_steps_per_second": 0.343, "step": 6252 }, { "epoch": 12.48, "learning_rate": 4.404990403071018e-06, "loss": 1.4946, "step": 6500 }, { "epoch": 13.0, "eval_f1": 0.9121, "eval_gen_len": 27.67890909090909, "eval_loss": 1.5164216756820679, "eval_precision": 0.9115, "eval_recall": 0.9131, "eval_rouge1": 0.4859, "eval_rouge2": 0.2341, "eval_rougeL": 0.4004, "eval_rougeLsum": 0.4005, "eval_runtime": 506.9944, "eval_samples_per_second": 5.424, "eval_steps_per_second": 0.339, "step": 6773 }, { "epoch": 13.44, "learning_rate": 3.2053742802303266e-06, "loss": 1.4877, "step": 7000 }, { "epoch": 14.0, "eval_f1": 0.9123, "eval_gen_len": 27.580363636363636, "eval_loss": 1.515085220336914, "eval_precision": 0.9119, "eval_recall": 0.9131, "eval_rouge1": 0.4868, "eval_rouge2": 0.235, "eval_rougeL": 0.4013, "eval_rougeLsum": 0.4013, "eval_runtime": 510.129, "eval_samples_per_second": 5.391, "eval_steps_per_second": 0.337, "step": 7294 }, { "epoch": 14.4, "learning_rate": 2.0057581573896352e-06, "loss": 1.4855, "step": 7500 }, { "epoch": 15.0, "eval_f1": 0.9122, "eval_gen_len": 27.584363636363637, "eval_loss": 1.5146222114562988, "eval_precision": 0.9117, "eval_recall": 0.9131, "eval_rouge1": 0.4863, "eval_rouge2": 0.2349, "eval_rougeL": 0.4014, "eval_rougeLsum": 0.4016, "eval_runtime": 507.3504, "eval_samples_per_second": 5.42, "eval_steps_per_second": 0.339, "step": 7815 }, { "epoch": 15.36, "learning_rate": 8.061420345489445e-07, "loss": 1.4782, "step": 8000 }, { "epoch": 16.0, "eval_f1": 0.9122, "eval_gen_len": 27.571636363636365, "eval_loss": 1.514625906944275, "eval_precision": 0.9118, "eval_recall": 0.9131, "eval_rouge1": 0.4863, "eval_rouge2": 0.2348, "eval_rougeL": 0.4011, "eval_rougeLsum": 0.4012, "eval_runtime": 505.7467, "eval_samples_per_second": 5.438, "eval_steps_per_second": 0.34, "step": 8336 }, { "epoch": 16.0, "step": 8336, "total_flos": 1.1557816346520453e+18, "train_loss": 0.8733468595713434, "train_runtime": 22758.1802, "train_samples_per_second": 35.152, "train_steps_per_second": 0.366 } ], "logging_steps": 500, "max_steps": 8336, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1.1557816346520453e+18, "train_batch_size": 24, "trial_name": null, "trial_params": null }