{ "best_metric": 0.020906077697873116, "best_model_checkpoint": "LLMNIDS-t5base-1/checkpoint-5043", "epoch": 3.0, "eval_steps": 500, "global_step": 5043, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 6.331108093261719, "learning_rate": 1.6336633663366337e-05, "loss": 5.5187, "step": 170 }, { "epoch": 0.2, "grad_norm": 4.446777820587158, "learning_rate": 3.3168316831683175e-05, "loss": 0.8703, "step": 340 }, { "epoch": 0.3, "grad_norm": 3.2873497009277344, "learning_rate": 5e-05, "loss": 0.3904, "step": 510 }, { "epoch": 0.4, "grad_norm": 5.13348913192749, "learning_rate": 4.813794623182018e-05, "loss": 0.1922, "step": 680 }, { "epoch": 0.51, "grad_norm": 1.8617521524429321, "learning_rate": 4.626487439400617e-05, "loss": 0.1091, "step": 850 }, { "epoch": 0.61, "grad_norm": 0.5739182829856873, "learning_rate": 4.439180255619216e-05, "loss": 0.0807, "step": 1020 }, { "epoch": 0.71, "grad_norm": 3.6188409328460693, "learning_rate": 4.251873071837814e-05, "loss": 0.0719, "step": 1190 }, { "epoch": 0.81, "grad_norm": 0.20058074593544006, "learning_rate": 4.064565888056412e-05, "loss": 0.0449, "step": 1360 }, { "epoch": 0.91, "grad_norm": 0.7106176614761353, "learning_rate": 3.877258704275011e-05, "loss": 0.0512, "step": 1530 }, { "epoch": 1.0, "eval_gen_len": 4.5944, "eval_loss": 0.044217173010110855, "eval_rouge1": 96.4965, "eval_rouge2": 43.8853, "eval_rougeL": 96.5111, "eval_rougeLsum": 96.4892, "eval_runtime": 56.362, "eval_samples_per_second": 121.287, "eval_steps_per_second": 7.594, "step": 1681 }, { "epoch": 1.01, "grad_norm": 0.03131448104977608, "learning_rate": 3.68995152049361e-05, "loss": 0.0372, "step": 1700 }, { "epoch": 1.11, "grad_norm": 0.7714371085166931, "learning_rate": 3.502644336712208e-05, "loss": 0.035, "step": 1870 }, { "epoch": 1.21, "grad_norm": 0.47366881370544434, "learning_rate": 3.316438959894227e-05, "loss": 0.0305, "step": 2040 }, { "epoch": 1.31, "grad_norm": 1.289427399635315, "learning_rate": 3.129131776112825e-05, "loss": 0.0295, "step": 2210 }, { "epoch": 1.42, "grad_norm": 1.7327721118927002, "learning_rate": 2.9440282062582637e-05, "loss": 0.0358, "step": 2380 }, { "epoch": 1.52, "grad_norm": 0.13644327223300934, "learning_rate": 2.756721022476862e-05, "loss": 0.0283, "step": 2550 }, { "epoch": 1.62, "grad_norm": 0.032617200165987015, "learning_rate": 2.5694138386954607e-05, "loss": 0.0213, "step": 2720 }, { "epoch": 1.72, "grad_norm": 0.21351170539855957, "learning_rate": 2.3821066549140592e-05, "loss": 0.0268, "step": 2890 }, { "epoch": 1.82, "grad_norm": 0.03627165034413338, "learning_rate": 2.1947994711326578e-05, "loss": 0.0196, "step": 3060 }, { "epoch": 1.92, "grad_norm": 0.08273334801197052, "learning_rate": 2.0074922873512563e-05, "loss": 0.0153, "step": 3230 }, { "epoch": 2.0, "eval_gen_len": 4.559, "eval_loss": 0.026822404935956, "eval_rouge1": 98.2592, "eval_rouge2": 44.3827, "eval_rougeL": 98.2592, "eval_rougeLsum": 98.2665, "eval_runtime": 55.7197, "eval_samples_per_second": 122.686, "eval_steps_per_second": 7.681, "step": 3362 }, { "epoch": 2.02, "grad_norm": 0.009718131273984909, "learning_rate": 1.8201851035698548e-05, "loss": 0.0173, "step": 3400 }, { "epoch": 2.12, "grad_norm": 0.4162699282169342, "learning_rate": 1.632877919788453e-05, "loss": 0.0202, "step": 3570 }, { "epoch": 2.22, "grad_norm": 0.6483927965164185, "learning_rate": 1.4455707360070516e-05, "loss": 0.0166, "step": 3740 }, { "epoch": 2.33, "grad_norm": 0.029726264998316765, "learning_rate": 1.25826355222565e-05, "loss": 0.0138, "step": 3910 }, { "epoch": 2.43, "grad_norm": 0.008093880489468575, "learning_rate": 1.0709563684442487e-05, "loss": 0.0124, "step": 4080 }, { "epoch": 2.53, "grad_norm": 0.06707121431827545, "learning_rate": 8.836491846628472e-06, "loss": 0.0193, "step": 4250 }, { "epoch": 2.63, "grad_norm": 0.49983274936676025, "learning_rate": 6.963420008814456e-06, "loss": 0.0142, "step": 4420 }, { "epoch": 2.73, "grad_norm": 0.009119812399148941, "learning_rate": 5.090348171000441e-06, "loss": 0.0118, "step": 4590 }, { "epoch": 2.83, "grad_norm": 0.155787855386734, "learning_rate": 3.217276333186426e-06, "loss": 0.0235, "step": 4760 }, { "epoch": 2.93, "grad_norm": 0.08707331120967865, "learning_rate": 1.3442044953724108e-06, "loss": 0.0135, "step": 4930 }, { "epoch": 3.0, "eval_gen_len": 4.5541, "eval_loss": 0.020906077697873116, "eval_rouge1": 98.3836, "eval_rouge2": 44.4266, "eval_rougeL": 98.3836, "eval_rougeLsum": 98.3909, "eval_runtime": 55.8601, "eval_samples_per_second": 122.377, "eval_steps_per_second": 7.662, "step": 5043 } ], "logging_steps": 170, "max_steps": 5043, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 6138752255262720.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }