LLMNIDS-t5small-1 / checkpoint-5043 /trainer_state.json
tali1's picture
Upload folder using huggingface_hub
cce4a70 verified
raw
history blame contribute delete
No virus
6.21 kB
{
"best_metric": 0.03447870910167694,
"best_model_checkpoint": "LLMNIDS-t5small-1/checkpoint-5043",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 5043,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 13.822306632995605,
"learning_rate": 1.6336633663366337e-05,
"loss": 6.4708,
"step": 170
},
{
"epoch": 0.2,
"grad_norm": 4.269347190856934,
"learning_rate": 3.306930693069307e-05,
"loss": 2.544,
"step": 340
},
{
"epoch": 0.3,
"grad_norm": 13.6782865524292,
"learning_rate": 4.9900990099009906e-05,
"loss": 0.8408,
"step": 510
},
{
"epoch": 0.4,
"grad_norm": 10.320158004760742,
"learning_rate": 4.813794623182018e-05,
"loss": 0.5769,
"step": 680
},
{
"epoch": 0.51,
"grad_norm": 4.96146297454834,
"learning_rate": 4.626487439400617e-05,
"loss": 0.4061,
"step": 850
},
{
"epoch": 0.61,
"grad_norm": 2.3415141105651855,
"learning_rate": 4.439180255619216e-05,
"loss": 0.3239,
"step": 1020
},
{
"epoch": 0.71,
"grad_norm": 5.0349297523498535,
"learning_rate": 4.251873071837814e-05,
"loss": 0.2748,
"step": 1190
},
{
"epoch": 0.81,
"grad_norm": 2.790640354156494,
"learning_rate": 4.064565888056412e-05,
"loss": 0.1759,
"step": 1360
},
{
"epoch": 0.91,
"grad_norm": 17.055940628051758,
"learning_rate": 3.877258704275011e-05,
"loss": 0.155,
"step": 1530
},
{
"epoch": 1.0,
"eval_gen_len": 4.5983,
"eval_loss": 0.07687737792730331,
"eval_rouge1": 94.273,
"eval_rouge2": 41.5009,
"eval_rougeL": 94.2803,
"eval_rougeLsum": 94.273,
"eval_runtime": 31.2003,
"eval_samples_per_second": 219.101,
"eval_steps_per_second": 13.718,
"step": 1681
},
{
"epoch": 1.01,
"grad_norm": 3.797430992126465,
"learning_rate": 3.68995152049361e-05,
"loss": 0.1099,
"step": 1700
},
{
"epoch": 1.11,
"grad_norm": 2.853536605834961,
"learning_rate": 3.502644336712208e-05,
"loss": 0.0839,
"step": 1870
},
{
"epoch": 1.21,
"grad_norm": 2.6505563259124756,
"learning_rate": 3.315337152930807e-05,
"loss": 0.0958,
"step": 2040
},
{
"epoch": 1.31,
"grad_norm": 4.214806079864502,
"learning_rate": 3.128029969149405e-05,
"loss": 0.0868,
"step": 2210
},
{
"epoch": 1.42,
"grad_norm": 3.6818881034851074,
"learning_rate": 2.9407227853680037e-05,
"loss": 0.0754,
"step": 2380
},
{
"epoch": 1.52,
"grad_norm": 0.5994019508361816,
"learning_rate": 2.753415601586602e-05,
"loss": 0.0736,
"step": 2550
},
{
"epoch": 1.62,
"grad_norm": 0.05288717895746231,
"learning_rate": 2.5661084178052008e-05,
"loss": 0.0563,
"step": 2720
},
{
"epoch": 1.72,
"grad_norm": 0.6832149028778076,
"learning_rate": 2.3788012340237993e-05,
"loss": 0.0681,
"step": 2890
},
{
"epoch": 1.82,
"grad_norm": 0.5376071929931641,
"learning_rate": 2.1914940502423974e-05,
"loss": 0.0457,
"step": 3060
},
{
"epoch": 1.92,
"grad_norm": 1.6747487783432007,
"learning_rate": 2.004186866460996e-05,
"loss": 0.0494,
"step": 3230
},
{
"epoch": 2.0,
"eval_gen_len": 4.5459,
"eval_loss": 0.038406919687986374,
"eval_rouge1": 97.2352,
"eval_rouge2": 43.4026,
"eval_rougeL": 97.2572,
"eval_rougeLsum": 97.2499,
"eval_runtime": 31.2975,
"eval_samples_per_second": 218.42,
"eval_steps_per_second": 13.675,
"step": 3362
},
{
"epoch": 2.02,
"grad_norm": 0.16915227472782135,
"learning_rate": 1.8168796826795948e-05,
"loss": 0.0515,
"step": 3400
},
{
"epoch": 2.12,
"grad_norm": 5.823632717132568,
"learning_rate": 1.629572498898193e-05,
"loss": 0.0487,
"step": 3570
},
{
"epoch": 2.22,
"grad_norm": 4.443608283996582,
"learning_rate": 1.4422653151167917e-05,
"loss": 0.0459,
"step": 3740
},
{
"epoch": 2.33,
"grad_norm": 2.1082069873809814,
"learning_rate": 1.25495813133539e-05,
"loss": 0.0416,
"step": 3910
},
{
"epoch": 2.43,
"grad_norm": 3.4776525497436523,
"learning_rate": 1.0676509475539887e-05,
"loss": 0.0412,
"step": 4080
},
{
"epoch": 2.53,
"grad_norm": 0.5169752240180969,
"learning_rate": 8.803437637725872e-06,
"loss": 0.0393,
"step": 4250
},
{
"epoch": 2.63,
"grad_norm": 7.877155780792236,
"learning_rate": 6.930365799911856e-06,
"loss": 0.0425,
"step": 4420
},
{
"epoch": 2.73,
"grad_norm": 0.5286906957626343,
"learning_rate": 5.0572939620978405e-06,
"loss": 0.0358,
"step": 4590
},
{
"epoch": 2.83,
"grad_norm": 1.1692440509796143,
"learning_rate": 3.1842221242838256e-06,
"loss": 0.0434,
"step": 4760
},
{
"epoch": 2.93,
"grad_norm": 1.6163593530654907,
"learning_rate": 1.3111502864698105e-06,
"loss": 0.0312,
"step": 4930
},
{
"epoch": 3.0,
"eval_gen_len": 4.5563,
"eval_loss": 0.03447870910167694,
"eval_rouge1": 97.3376,
"eval_rouge2": 43.622,
"eval_rougeL": 97.3669,
"eval_rougeLsum": 97.3596,
"eval_runtime": 31.306,
"eval_samples_per_second": 218.361,
"eval_steps_per_second": 13.672,
"step": 5043
}
],
"logging_steps": 170,
"max_steps": 5043,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1364346865188864.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}