opt-350m-lora / trainer_state.json
SushantGautam's picture
Upload folder using huggingface_hub
c4ffb9e verified
raw
history blame
11.7 kB
{
"best_metric": 0.3871,
"best_model_checkpoint": "logs_OPT/ybelkada/opt-350m-lora/checkpoint-1308",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 2616,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"grad_norm": 12307.013671875,
"learning_rate": 9.885321100917432e-05,
"loss": 2.8491,
"step": 50
},
{
"epoch": 0.23,
"grad_norm": 12963.0400390625,
"learning_rate": 9.770642201834863e-05,
"loss": 2.789,
"step": 100
},
{
"epoch": 0.34,
"grad_norm": 11882.9541015625,
"learning_rate": 9.655963302752295e-05,
"loss": 2.7515,
"step": 150
},
{
"epoch": 0.46,
"grad_norm": 11366.3447265625,
"learning_rate": 9.541284403669725e-05,
"loss": 2.7392,
"step": 200
},
{
"epoch": 0.57,
"grad_norm": 11676.5546875,
"learning_rate": 9.426605504587156e-05,
"loss": 2.7247,
"step": 250
},
{
"epoch": 0.69,
"grad_norm": 11853.2451171875,
"learning_rate": 9.311926605504587e-05,
"loss": 2.7219,
"step": 300
},
{
"epoch": 0.8,
"grad_norm": 11694.083984375,
"learning_rate": 9.197247706422019e-05,
"loss": 2.7155,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 11730.7705078125,
"learning_rate": 9.08256880733945e-05,
"loss": 2.7153,
"step": 400
},
{
"epoch": 1.0,
"eval_bert-score-f1": 0.05165594071149826,
"eval_bert-score-precision": 0.009280605241656303,
"eval_bert-score-recall": 0.09162026643753052,
"eval_gen_len": 1.0,
"eval_loss": 2.908092975616455,
"eval_rouge1": 0.5438,
"eval_rouge2": 0.1898,
"eval_rougeL": 0.3779,
"eval_rougeLsum": 0.5253,
"eval_runtime": 1.9821,
"eval_samples_per_second": 0.505,
"eval_steps_per_second": 0.505,
"step": 436
},
{
"epoch": 1.03,
"grad_norm": 11704.4638671875,
"learning_rate": 8.967889908256882e-05,
"loss": 2.7075,
"step": 450
},
{
"epoch": 1.15,
"grad_norm": 11411.2861328125,
"learning_rate": 8.853211009174312e-05,
"loss": 2.7113,
"step": 500
},
{
"epoch": 1.26,
"grad_norm": 11756.0546875,
"learning_rate": 8.738532110091744e-05,
"loss": 2.7055,
"step": 550
},
{
"epoch": 1.38,
"grad_norm": 12264.1328125,
"learning_rate": 8.623853211009176e-05,
"loss": 2.7084,
"step": 600
},
{
"epoch": 1.49,
"grad_norm": 12191.14453125,
"learning_rate": 8.509174311926605e-05,
"loss": 2.7043,
"step": 650
},
{
"epoch": 1.61,
"grad_norm": 12316.2470703125,
"learning_rate": 8.394495412844037e-05,
"loss": 2.7002,
"step": 700
},
{
"epoch": 1.72,
"grad_norm": 12066.7626953125,
"learning_rate": 8.279816513761469e-05,
"loss": 2.7009,
"step": 750
},
{
"epoch": 1.83,
"grad_norm": 12753.3515625,
"learning_rate": 8.165137614678899e-05,
"loss": 2.6945,
"step": 800
},
{
"epoch": 1.95,
"grad_norm": 13497.3369140625,
"learning_rate": 8.050458715596331e-05,
"loss": 2.7001,
"step": 850
},
{
"epoch": 2.0,
"eval_bert-score-f1": 0.055162131786346436,
"eval_bert-score-precision": 0.015301413834095001,
"eval_bert-score-recall": 0.09254243224859238,
"eval_gen_len": 1.0,
"eval_loss": 2.8941495418548584,
"eval_rouge1": 0.553,
"eval_rouge2": 0.213,
"eval_rougeL": 0.3825,
"eval_rougeLsum": 0.5346,
"eval_runtime": 0.2895,
"eval_samples_per_second": 3.454,
"eval_steps_per_second": 3.454,
"step": 872
},
{
"epoch": 2.06,
"grad_norm": 12265.7421875,
"learning_rate": 7.935779816513761e-05,
"loss": 2.6953,
"step": 900
},
{
"epoch": 2.18,
"grad_norm": 12369.7255859375,
"learning_rate": 7.821100917431193e-05,
"loss": 2.7006,
"step": 950
},
{
"epoch": 2.29,
"grad_norm": 12023.724609375,
"learning_rate": 7.706422018348625e-05,
"loss": 2.6967,
"step": 1000
},
{
"epoch": 2.41,
"grad_norm": 12832.462890625,
"learning_rate": 7.591743119266055e-05,
"loss": 2.6898,
"step": 1050
},
{
"epoch": 2.52,
"grad_norm": 12962.9765625,
"learning_rate": 7.477064220183486e-05,
"loss": 2.6945,
"step": 1100
},
{
"epoch": 2.64,
"grad_norm": 12374.212890625,
"learning_rate": 7.362385321100918e-05,
"loss": 2.6897,
"step": 1150
},
{
"epoch": 2.75,
"grad_norm": 12595.6669921875,
"learning_rate": 7.247706422018348e-05,
"loss": 2.6926,
"step": 1200
},
{
"epoch": 2.87,
"grad_norm": 12504.0595703125,
"learning_rate": 7.13302752293578e-05,
"loss": 2.6891,
"step": 1250
},
{
"epoch": 2.98,
"grad_norm": 12908.3642578125,
"learning_rate": 7.018348623853212e-05,
"loss": 2.695,
"step": 1300
},
{
"epoch": 3.0,
"eval_bert-score-f1": 0.06703756004571915,
"eval_bert-score-precision": 0.030596459284424782,
"eval_bert-score-recall": 0.10093347728252411,
"eval_gen_len": 1.0,
"eval_loss": 2.8901169300079346,
"eval_rouge1": 0.5484,
"eval_rouge2": 0.2083,
"eval_rougeL": 0.3871,
"eval_rougeLsum": 0.53,
"eval_runtime": 0.3104,
"eval_samples_per_second": 3.221,
"eval_steps_per_second": 3.221,
"step": 1308
},
{
"epoch": 3.1,
"grad_norm": 12873.3271484375,
"learning_rate": 6.903669724770642e-05,
"loss": 2.6891,
"step": 1350
},
{
"epoch": 3.21,
"grad_norm": 12999.7646484375,
"learning_rate": 6.788990825688074e-05,
"loss": 2.6907,
"step": 1400
},
{
"epoch": 3.33,
"grad_norm": 13313.30078125,
"learning_rate": 6.674311926605505e-05,
"loss": 2.6826,
"step": 1450
},
{
"epoch": 3.44,
"grad_norm": 13062.2900390625,
"learning_rate": 6.559633027522935e-05,
"loss": 2.6885,
"step": 1500
},
{
"epoch": 3.56,
"grad_norm": 12728.0263671875,
"learning_rate": 6.444954128440367e-05,
"loss": 2.6919,
"step": 1550
},
{
"epoch": 3.67,
"grad_norm": 12907.9404296875,
"learning_rate": 6.330275229357799e-05,
"loss": 2.693,
"step": 1600
},
{
"epoch": 3.78,
"grad_norm": 12380.5546875,
"learning_rate": 6.21559633027523e-05,
"loss": 2.6879,
"step": 1650
},
{
"epoch": 3.9,
"grad_norm": 12330.32421875,
"learning_rate": 6.1009174311926606e-05,
"loss": 2.6867,
"step": 1700
},
{
"epoch": 4.0,
"eval_bert-score-f1": 0.06787349283695221,
"eval_bert-score-precision": 0.02922355942428112,
"eval_bert-score-recall": 0.10404554754495621,
"eval_gen_len": 1.0,
"eval_loss": 2.888526678085327,
"eval_rouge1": 0.5346,
"eval_rouge2": 0.1991,
"eval_rougeL": 0.3779,
"eval_rougeLsum": 0.5161,
"eval_runtime": 0.3143,
"eval_samples_per_second": 3.182,
"eval_steps_per_second": 3.182,
"step": 1744
},
{
"epoch": 4.01,
"grad_norm": 13034.7822265625,
"learning_rate": 5.9862385321100924e-05,
"loss": 2.6872,
"step": 1750
},
{
"epoch": 4.13,
"grad_norm": 12861.380859375,
"learning_rate": 5.8715596330275236e-05,
"loss": 2.6879,
"step": 1800
},
{
"epoch": 4.24,
"grad_norm": 12877.626953125,
"learning_rate": 5.756880733944955e-05,
"loss": 2.6949,
"step": 1850
},
{
"epoch": 4.36,
"grad_norm": 13110.791015625,
"learning_rate": 5.642201834862385e-05,
"loss": 2.6798,
"step": 1900
},
{
"epoch": 4.47,
"grad_norm": 13043.751953125,
"learning_rate": 5.5275229357798164e-05,
"loss": 2.6877,
"step": 1950
},
{
"epoch": 4.59,
"grad_norm": 13076.4658203125,
"learning_rate": 5.4128440366972475e-05,
"loss": 2.6807,
"step": 2000
},
{
"epoch": 4.7,
"grad_norm": 12260.9541015625,
"learning_rate": 5.2981651376146794e-05,
"loss": 2.6806,
"step": 2050
},
{
"epoch": 4.82,
"grad_norm": 12857.9267578125,
"learning_rate": 5.1834862385321105e-05,
"loss": 2.6861,
"step": 2100
},
{
"epoch": 4.93,
"grad_norm": 13119.2509765625,
"learning_rate": 5.068807339449542e-05,
"loss": 2.689,
"step": 2150
},
{
"epoch": 5.0,
"eval_bert-score-f1": 0.07044733315706253,
"eval_bert-score-precision": 0.030797742307186127,
"eval_bert-score-recall": 0.1076577752828598,
"eval_gen_len": 1.0,
"eval_loss": 2.8861072063446045,
"eval_rouge1": 0.5484,
"eval_rouge2": 0.2037,
"eval_rougeL": 0.3871,
"eval_rougeLsum": 0.53,
"eval_runtime": 0.285,
"eval_samples_per_second": 3.509,
"eval_steps_per_second": 3.509,
"step": 2180
},
{
"epoch": 5.05,
"grad_norm": 13340.306640625,
"learning_rate": 4.954128440366973e-05,
"loss": 2.6858,
"step": 2200
},
{
"epoch": 5.16,
"grad_norm": 12620.7880859375,
"learning_rate": 4.839449541284404e-05,
"loss": 2.6824,
"step": 2250
},
{
"epoch": 5.28,
"grad_norm": 13280.1328125,
"learning_rate": 4.724770642201835e-05,
"loss": 2.6859,
"step": 2300
},
{
"epoch": 5.39,
"grad_norm": 12882.2529296875,
"learning_rate": 4.610091743119266e-05,
"loss": 2.6786,
"step": 2350
},
{
"epoch": 5.5,
"grad_norm": 13004.6220703125,
"learning_rate": 4.4954128440366975e-05,
"loss": 2.6792,
"step": 2400
},
{
"epoch": 5.62,
"grad_norm": 12589.1669921875,
"learning_rate": 4.3807339449541286e-05,
"loss": 2.6816,
"step": 2450
},
{
"epoch": 5.73,
"grad_norm": 12607.59765625,
"learning_rate": 4.26605504587156e-05,
"loss": 2.6892,
"step": 2500
},
{
"epoch": 5.85,
"grad_norm": 14242.603515625,
"learning_rate": 4.151376146788991e-05,
"loss": 2.6828,
"step": 2550
},
{
"epoch": 5.96,
"grad_norm": 12890.3896484375,
"learning_rate": 4.036697247706422e-05,
"loss": 2.684,
"step": 2600
},
{
"epoch": 6.0,
"eval_bert-score-f1": 0.059426210820674896,
"eval_bert-score-precision": 0.024532141163945198,
"eval_bert-score-recall": 0.09170834720134735,
"eval_gen_len": 1.0,
"eval_loss": 2.8896830081939697,
"eval_rouge1": 0.53,
"eval_rouge2": 0.1944,
"eval_rougeL": 0.3733,
"eval_rougeLsum": 0.5115,
"eval_runtime": 0.2673,
"eval_samples_per_second": 3.741,
"eval_steps_per_second": 3.741,
"step": 2616
}
],
"logging_steps": 50,
"max_steps": 4360,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.000077993558016e+17,
"train_batch_size": 180,
"trial_name": null,
"trial_params": null
}