|
{ |
|
"best_metric": 0.3871, |
|
"best_model_checkpoint": "logs_OPT/ybelkada/opt-350m-lora/checkpoint-1308", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 2616, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12307.013671875, |
|
"learning_rate": 9.885321100917432e-05, |
|
"loss": 2.8491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 12963.0400390625, |
|
"learning_rate": 9.770642201834863e-05, |
|
"loss": 2.789, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 11882.9541015625, |
|
"learning_rate": 9.655963302752295e-05, |
|
"loss": 2.7515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 11366.3447265625, |
|
"learning_rate": 9.541284403669725e-05, |
|
"loss": 2.7392, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 11676.5546875, |
|
"learning_rate": 9.426605504587156e-05, |
|
"loss": 2.7247, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11853.2451171875, |
|
"learning_rate": 9.311926605504587e-05, |
|
"loss": 2.7219, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 11694.083984375, |
|
"learning_rate": 9.197247706422019e-05, |
|
"loss": 2.7155, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 11730.7705078125, |
|
"learning_rate": 9.08256880733945e-05, |
|
"loss": 2.7153, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bert-score-f1": 0.05165594071149826, |
|
"eval_bert-score-precision": 0.009280605241656303, |
|
"eval_bert-score-recall": 0.09162026643753052, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.908092975616455, |
|
"eval_rouge1": 0.5438, |
|
"eval_rouge2": 0.1898, |
|
"eval_rougeL": 0.3779, |
|
"eval_rougeLsum": 0.5253, |
|
"eval_runtime": 1.9821, |
|
"eval_samples_per_second": 0.505, |
|
"eval_steps_per_second": 0.505, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 11704.4638671875, |
|
"learning_rate": 8.967889908256882e-05, |
|
"loss": 2.7075, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 11411.2861328125, |
|
"learning_rate": 8.853211009174312e-05, |
|
"loss": 2.7113, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 11756.0546875, |
|
"learning_rate": 8.738532110091744e-05, |
|
"loss": 2.7055, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 12264.1328125, |
|
"learning_rate": 8.623853211009176e-05, |
|
"loss": 2.7084, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 12191.14453125, |
|
"learning_rate": 8.509174311926605e-05, |
|
"loss": 2.7043, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 12316.2470703125, |
|
"learning_rate": 8.394495412844037e-05, |
|
"loss": 2.7002, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 12066.7626953125, |
|
"learning_rate": 8.279816513761469e-05, |
|
"loss": 2.7009, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 12753.3515625, |
|
"learning_rate": 8.165137614678899e-05, |
|
"loss": 2.6945, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 13497.3369140625, |
|
"learning_rate": 8.050458715596331e-05, |
|
"loss": 2.7001, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bert-score-f1": 0.055162131786346436, |
|
"eval_bert-score-precision": 0.015301413834095001, |
|
"eval_bert-score-recall": 0.09254243224859238, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.8941495418548584, |
|
"eval_rouge1": 0.553, |
|
"eval_rouge2": 0.213, |
|
"eval_rougeL": 0.3825, |
|
"eval_rougeLsum": 0.5346, |
|
"eval_runtime": 0.2895, |
|
"eval_samples_per_second": 3.454, |
|
"eval_steps_per_second": 3.454, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 12265.7421875, |
|
"learning_rate": 7.935779816513761e-05, |
|
"loss": 2.6953, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 12369.7255859375, |
|
"learning_rate": 7.821100917431193e-05, |
|
"loss": 2.7006, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 12023.724609375, |
|
"learning_rate": 7.706422018348625e-05, |
|
"loss": 2.6967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 12832.462890625, |
|
"learning_rate": 7.591743119266055e-05, |
|
"loss": 2.6898, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 12962.9765625, |
|
"learning_rate": 7.477064220183486e-05, |
|
"loss": 2.6945, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 12374.212890625, |
|
"learning_rate": 7.362385321100918e-05, |
|
"loss": 2.6897, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 12595.6669921875, |
|
"learning_rate": 7.247706422018348e-05, |
|
"loss": 2.6926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 12504.0595703125, |
|
"learning_rate": 7.13302752293578e-05, |
|
"loss": 2.6891, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 12908.3642578125, |
|
"learning_rate": 7.018348623853212e-05, |
|
"loss": 2.695, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bert-score-f1": 0.06703756004571915, |
|
"eval_bert-score-precision": 0.030596459284424782, |
|
"eval_bert-score-recall": 0.10093347728252411, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.8901169300079346, |
|
"eval_rouge1": 0.5484, |
|
"eval_rouge2": 0.2083, |
|
"eval_rougeL": 0.3871, |
|
"eval_rougeLsum": 0.53, |
|
"eval_runtime": 0.3104, |
|
"eval_samples_per_second": 3.221, |
|
"eval_steps_per_second": 3.221, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 12873.3271484375, |
|
"learning_rate": 6.903669724770642e-05, |
|
"loss": 2.6891, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 12999.7646484375, |
|
"learning_rate": 6.788990825688074e-05, |
|
"loss": 2.6907, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 13313.30078125, |
|
"learning_rate": 6.674311926605505e-05, |
|
"loss": 2.6826, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 13062.2900390625, |
|
"learning_rate": 6.559633027522935e-05, |
|
"loss": 2.6885, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 12728.0263671875, |
|
"learning_rate": 6.444954128440367e-05, |
|
"loss": 2.6919, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 12907.9404296875, |
|
"learning_rate": 6.330275229357799e-05, |
|
"loss": 2.693, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"grad_norm": 12380.5546875, |
|
"learning_rate": 6.21559633027523e-05, |
|
"loss": 2.6879, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 12330.32421875, |
|
"learning_rate": 6.1009174311926606e-05, |
|
"loss": 2.6867, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bert-score-f1": 0.06787349283695221, |
|
"eval_bert-score-precision": 0.02922355942428112, |
|
"eval_bert-score-recall": 0.10404554754495621, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.888526678085327, |
|
"eval_rouge1": 0.5346, |
|
"eval_rouge2": 0.1991, |
|
"eval_rougeL": 0.3779, |
|
"eval_rougeLsum": 0.5161, |
|
"eval_runtime": 0.3143, |
|
"eval_samples_per_second": 3.182, |
|
"eval_steps_per_second": 3.182, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 13034.7822265625, |
|
"learning_rate": 5.9862385321100924e-05, |
|
"loss": 2.6872, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 12861.380859375, |
|
"learning_rate": 5.8715596330275236e-05, |
|
"loss": 2.6879, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 12877.626953125, |
|
"learning_rate": 5.756880733944955e-05, |
|
"loss": 2.6949, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 13110.791015625, |
|
"learning_rate": 5.642201834862385e-05, |
|
"loss": 2.6798, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 13043.751953125, |
|
"learning_rate": 5.5275229357798164e-05, |
|
"loss": 2.6877, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 13076.4658203125, |
|
"learning_rate": 5.4128440366972475e-05, |
|
"loss": 2.6807, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 12260.9541015625, |
|
"learning_rate": 5.2981651376146794e-05, |
|
"loss": 2.6806, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 12857.9267578125, |
|
"learning_rate": 5.1834862385321105e-05, |
|
"loss": 2.6861, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 13119.2509765625, |
|
"learning_rate": 5.068807339449542e-05, |
|
"loss": 2.689, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bert-score-f1": 0.07044733315706253, |
|
"eval_bert-score-precision": 0.030797742307186127, |
|
"eval_bert-score-recall": 0.1076577752828598, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.8861072063446045, |
|
"eval_rouge1": 0.5484, |
|
"eval_rouge2": 0.2037, |
|
"eval_rougeL": 0.3871, |
|
"eval_rougeLsum": 0.53, |
|
"eval_runtime": 0.285, |
|
"eval_samples_per_second": 3.509, |
|
"eval_steps_per_second": 3.509, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 13340.306640625, |
|
"learning_rate": 4.954128440366973e-05, |
|
"loss": 2.6858, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"grad_norm": 12620.7880859375, |
|
"learning_rate": 4.839449541284404e-05, |
|
"loss": 2.6824, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 13280.1328125, |
|
"learning_rate": 4.724770642201835e-05, |
|
"loss": 2.6859, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 12882.2529296875, |
|
"learning_rate": 4.610091743119266e-05, |
|
"loss": 2.6786, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 13004.6220703125, |
|
"learning_rate": 4.4954128440366975e-05, |
|
"loss": 2.6792, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"grad_norm": 12589.1669921875, |
|
"learning_rate": 4.3807339449541286e-05, |
|
"loss": 2.6816, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"grad_norm": 12607.59765625, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.6892, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"grad_norm": 14242.603515625, |
|
"learning_rate": 4.151376146788991e-05, |
|
"loss": 2.6828, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 12890.3896484375, |
|
"learning_rate": 4.036697247706422e-05, |
|
"loss": 2.684, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bert-score-f1": 0.059426210820674896, |
|
"eval_bert-score-precision": 0.024532141163945198, |
|
"eval_bert-score-recall": 0.09170834720134735, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 2.8896830081939697, |
|
"eval_rouge1": 0.53, |
|
"eval_rouge2": 0.1944, |
|
"eval_rougeL": 0.3733, |
|
"eval_rougeLsum": 0.5115, |
|
"eval_runtime": 0.2673, |
|
"eval_samples_per_second": 3.741, |
|
"eval_steps_per_second": 3.741, |
|
"step": 2616 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 4360, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.000077993558016e+17, |
|
"train_batch_size": 180, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|