{ "best_metric": 0.3871, "best_model_checkpoint": "logs_OPT/ybelkada/opt-350m-lora/checkpoint-1308", "epoch": 6.0, "eval_steps": 500, "global_step": 2616, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "grad_norm": 12307.013671875, "learning_rate": 9.885321100917432e-05, "loss": 2.8491, "step": 50 }, { "epoch": 0.23, "grad_norm": 12963.0400390625, "learning_rate": 9.770642201834863e-05, "loss": 2.789, "step": 100 }, { "epoch": 0.34, "grad_norm": 11882.9541015625, "learning_rate": 9.655963302752295e-05, "loss": 2.7515, "step": 150 }, { "epoch": 0.46, "grad_norm": 11366.3447265625, "learning_rate": 9.541284403669725e-05, "loss": 2.7392, "step": 200 }, { "epoch": 0.57, "grad_norm": 11676.5546875, "learning_rate": 9.426605504587156e-05, "loss": 2.7247, "step": 250 }, { "epoch": 0.69, "grad_norm": 11853.2451171875, "learning_rate": 9.311926605504587e-05, "loss": 2.7219, "step": 300 }, { "epoch": 0.8, "grad_norm": 11694.083984375, "learning_rate": 9.197247706422019e-05, "loss": 2.7155, "step": 350 }, { "epoch": 0.92, "grad_norm": 11730.7705078125, "learning_rate": 9.08256880733945e-05, "loss": 2.7153, "step": 400 }, { "epoch": 1.0, "eval_bert-score-f1": 0.05165594071149826, "eval_bert-score-precision": 0.009280605241656303, "eval_bert-score-recall": 0.09162026643753052, "eval_gen_len": 1.0, "eval_loss": 2.908092975616455, "eval_rouge1": 0.5438, "eval_rouge2": 0.1898, "eval_rougeL": 0.3779, "eval_rougeLsum": 0.5253, "eval_runtime": 1.9821, "eval_samples_per_second": 0.505, "eval_steps_per_second": 0.505, "step": 436 }, { "epoch": 1.03, "grad_norm": 11704.4638671875, "learning_rate": 8.967889908256882e-05, "loss": 2.7075, "step": 450 }, { "epoch": 1.15, "grad_norm": 11411.2861328125, "learning_rate": 8.853211009174312e-05, "loss": 2.7113, "step": 500 }, { "epoch": 1.26, "grad_norm": 11756.0546875, "learning_rate": 8.738532110091744e-05, "loss": 2.7055, "step": 550 }, { "epoch": 1.38, "grad_norm": 12264.1328125, "learning_rate": 8.623853211009176e-05, "loss": 2.7084, "step": 600 }, { "epoch": 1.49, "grad_norm": 12191.14453125, "learning_rate": 8.509174311926605e-05, "loss": 2.7043, "step": 650 }, { "epoch": 1.61, "grad_norm": 12316.2470703125, "learning_rate": 8.394495412844037e-05, "loss": 2.7002, "step": 700 }, { "epoch": 1.72, "grad_norm": 12066.7626953125, "learning_rate": 8.279816513761469e-05, "loss": 2.7009, "step": 750 }, { "epoch": 1.83, "grad_norm": 12753.3515625, "learning_rate": 8.165137614678899e-05, "loss": 2.6945, "step": 800 }, { "epoch": 1.95, "grad_norm": 13497.3369140625, "learning_rate": 8.050458715596331e-05, "loss": 2.7001, "step": 850 }, { "epoch": 2.0, "eval_bert-score-f1": 0.055162131786346436, "eval_bert-score-precision": 0.015301413834095001, "eval_bert-score-recall": 0.09254243224859238, "eval_gen_len": 1.0, "eval_loss": 2.8941495418548584, "eval_rouge1": 0.553, "eval_rouge2": 0.213, "eval_rougeL": 0.3825, "eval_rougeLsum": 0.5346, "eval_runtime": 0.2895, "eval_samples_per_second": 3.454, "eval_steps_per_second": 3.454, "step": 872 }, { "epoch": 2.06, "grad_norm": 12265.7421875, "learning_rate": 7.935779816513761e-05, "loss": 2.6953, "step": 900 }, { "epoch": 2.18, "grad_norm": 12369.7255859375, "learning_rate": 7.821100917431193e-05, "loss": 2.7006, "step": 950 }, { "epoch": 2.29, "grad_norm": 12023.724609375, "learning_rate": 7.706422018348625e-05, "loss": 2.6967, "step": 1000 }, { "epoch": 2.41, "grad_norm": 12832.462890625, "learning_rate": 7.591743119266055e-05, "loss": 2.6898, "step": 1050 }, { "epoch": 2.52, "grad_norm": 12962.9765625, "learning_rate": 7.477064220183486e-05, "loss": 2.6945, "step": 1100 }, { "epoch": 2.64, "grad_norm": 12374.212890625, "learning_rate": 7.362385321100918e-05, "loss": 2.6897, "step": 1150 }, { "epoch": 2.75, "grad_norm": 12595.6669921875, "learning_rate": 7.247706422018348e-05, "loss": 2.6926, "step": 1200 }, { "epoch": 2.87, "grad_norm": 12504.0595703125, "learning_rate": 7.13302752293578e-05, "loss": 2.6891, "step": 1250 }, { "epoch": 2.98, "grad_norm": 12908.3642578125, "learning_rate": 7.018348623853212e-05, "loss": 2.695, "step": 1300 }, { "epoch": 3.0, "eval_bert-score-f1": 0.06703756004571915, "eval_bert-score-precision": 0.030596459284424782, "eval_bert-score-recall": 0.10093347728252411, "eval_gen_len": 1.0, "eval_loss": 2.8901169300079346, "eval_rouge1": 0.5484, "eval_rouge2": 0.2083, "eval_rougeL": 0.3871, "eval_rougeLsum": 0.53, "eval_runtime": 0.3104, "eval_samples_per_second": 3.221, "eval_steps_per_second": 3.221, "step": 1308 }, { "epoch": 3.1, "grad_norm": 12873.3271484375, "learning_rate": 6.903669724770642e-05, "loss": 2.6891, "step": 1350 }, { "epoch": 3.21, "grad_norm": 12999.7646484375, "learning_rate": 6.788990825688074e-05, "loss": 2.6907, "step": 1400 }, { "epoch": 3.33, "grad_norm": 13313.30078125, "learning_rate": 6.674311926605505e-05, "loss": 2.6826, "step": 1450 }, { "epoch": 3.44, "grad_norm": 13062.2900390625, "learning_rate": 6.559633027522935e-05, "loss": 2.6885, "step": 1500 }, { "epoch": 3.56, "grad_norm": 12728.0263671875, "learning_rate": 6.444954128440367e-05, "loss": 2.6919, "step": 1550 }, { "epoch": 3.67, "grad_norm": 12907.9404296875, "learning_rate": 6.330275229357799e-05, "loss": 2.693, "step": 1600 }, { "epoch": 3.78, "grad_norm": 12380.5546875, "learning_rate": 6.21559633027523e-05, "loss": 2.6879, "step": 1650 }, { "epoch": 3.9, "grad_norm": 12330.32421875, "learning_rate": 6.1009174311926606e-05, "loss": 2.6867, "step": 1700 }, { "epoch": 4.0, "eval_bert-score-f1": 0.06787349283695221, "eval_bert-score-precision": 0.02922355942428112, "eval_bert-score-recall": 0.10404554754495621, "eval_gen_len": 1.0, "eval_loss": 2.888526678085327, "eval_rouge1": 0.5346, "eval_rouge2": 0.1991, "eval_rougeL": 0.3779, "eval_rougeLsum": 0.5161, "eval_runtime": 0.3143, "eval_samples_per_second": 3.182, "eval_steps_per_second": 3.182, "step": 1744 }, { "epoch": 4.01, "grad_norm": 13034.7822265625, "learning_rate": 5.9862385321100924e-05, "loss": 2.6872, "step": 1750 }, { "epoch": 4.13, "grad_norm": 12861.380859375, "learning_rate": 5.8715596330275236e-05, "loss": 2.6879, "step": 1800 }, { "epoch": 4.24, "grad_norm": 12877.626953125, "learning_rate": 5.756880733944955e-05, "loss": 2.6949, "step": 1850 }, { "epoch": 4.36, "grad_norm": 13110.791015625, "learning_rate": 5.642201834862385e-05, "loss": 2.6798, "step": 1900 }, { "epoch": 4.47, "grad_norm": 13043.751953125, "learning_rate": 5.5275229357798164e-05, "loss": 2.6877, "step": 1950 }, { "epoch": 4.59, "grad_norm": 13076.4658203125, "learning_rate": 5.4128440366972475e-05, "loss": 2.6807, "step": 2000 }, { "epoch": 4.7, "grad_norm": 12260.9541015625, "learning_rate": 5.2981651376146794e-05, "loss": 2.6806, "step": 2050 }, { "epoch": 4.82, "grad_norm": 12857.9267578125, "learning_rate": 5.1834862385321105e-05, "loss": 2.6861, "step": 2100 }, { "epoch": 4.93, "grad_norm": 13119.2509765625, "learning_rate": 5.068807339449542e-05, "loss": 2.689, "step": 2150 }, { "epoch": 5.0, "eval_bert-score-f1": 0.07044733315706253, "eval_bert-score-precision": 0.030797742307186127, "eval_bert-score-recall": 0.1076577752828598, "eval_gen_len": 1.0, "eval_loss": 2.8861072063446045, "eval_rouge1": 0.5484, "eval_rouge2": 0.2037, "eval_rougeL": 0.3871, "eval_rougeLsum": 0.53, "eval_runtime": 0.285, "eval_samples_per_second": 3.509, "eval_steps_per_second": 3.509, "step": 2180 }, { "epoch": 5.05, "grad_norm": 13340.306640625, "learning_rate": 4.954128440366973e-05, "loss": 2.6858, "step": 2200 }, { "epoch": 5.16, "grad_norm": 12620.7880859375, "learning_rate": 4.839449541284404e-05, "loss": 2.6824, "step": 2250 }, { "epoch": 5.28, "grad_norm": 13280.1328125, "learning_rate": 4.724770642201835e-05, "loss": 2.6859, "step": 2300 }, { "epoch": 5.39, "grad_norm": 12882.2529296875, "learning_rate": 4.610091743119266e-05, "loss": 2.6786, "step": 2350 }, { "epoch": 5.5, "grad_norm": 13004.6220703125, "learning_rate": 4.4954128440366975e-05, "loss": 2.6792, "step": 2400 }, { "epoch": 5.62, "grad_norm": 12589.1669921875, "learning_rate": 4.3807339449541286e-05, "loss": 2.6816, "step": 2450 }, { "epoch": 5.73, "grad_norm": 12607.59765625, "learning_rate": 4.26605504587156e-05, "loss": 2.6892, "step": 2500 }, { "epoch": 5.85, "grad_norm": 14242.603515625, "learning_rate": 4.151376146788991e-05, "loss": 2.6828, "step": 2550 }, { "epoch": 5.96, "grad_norm": 12890.3896484375, "learning_rate": 4.036697247706422e-05, "loss": 2.684, "step": 2600 }, { "epoch": 6.0, "eval_bert-score-f1": 0.059426210820674896, "eval_bert-score-precision": 0.024532141163945198, "eval_bert-score-recall": 0.09170834720134735, "eval_gen_len": 1.0, "eval_loss": 2.8896830081939697, "eval_rouge1": 0.53, "eval_rouge2": 0.1944, "eval_rougeL": 0.3733, "eval_rougeLsum": 0.5115, "eval_runtime": 0.2673, "eval_samples_per_second": 3.741, "eval_steps_per_second": 3.741, "step": 2616 } ], "logging_steps": 50, "max_steps": 4360, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.000077993558016e+17, "train_batch_size": 180, "trial_name": null, "trial_params": null }