ascolda's picture
Upload 13 files
1352967 verified
raw
history blame contribute delete
No virus
6.39 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 250,
"global_step": 3975,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 4.685534591194969e-05,
"loss": 0.9547,
"step": 250
},
{
"epoch": 0.19,
"eval_bleu": 47.1486,
"eval_gen_len": 42.0851,
"eval_loss": 0.7848668098449707,
"eval_runtime": 216.7661,
"eval_samples_per_second": 10.195,
"eval_steps_per_second": 0.323,
"step": 250
},
{
"epoch": 0.38,
"learning_rate": 4.3710691823899376e-05,
"loss": 0.795,
"step": 500
},
{
"epoch": 0.38,
"eval_bleu": 49.685,
"eval_gen_len": 42.7009,
"eval_loss": 0.7136563062667847,
"eval_runtime": 223.9634,
"eval_samples_per_second": 9.868,
"eval_steps_per_second": 0.313,
"step": 500
},
{
"epoch": 0.57,
"learning_rate": 4.0566037735849064e-05,
"loss": 0.7304,
"step": 750
},
{
"epoch": 0.57,
"eval_bleu": 51.2955,
"eval_gen_len": 42.5633,
"eval_loss": 0.6703998446464539,
"eval_runtime": 219.0588,
"eval_samples_per_second": 10.089,
"eval_steps_per_second": 0.32,
"step": 750
},
{
"epoch": 0.75,
"learning_rate": 3.7421383647798744e-05,
"loss": 0.6963,
"step": 1000
},
{
"epoch": 0.75,
"eval_bleu": 52.4864,
"eval_gen_len": 42.395,
"eval_loss": 0.6411181688308716,
"eval_runtime": 230.5264,
"eval_samples_per_second": 9.587,
"eval_steps_per_second": 0.304,
"step": 1000
},
{
"epoch": 0.94,
"learning_rate": 3.4276729559748424e-05,
"loss": 0.6758,
"step": 1250
},
{
"epoch": 0.94,
"eval_bleu": 53.4802,
"eval_gen_len": 42.5258,
"eval_loss": 0.6214491128921509,
"eval_runtime": 221.0884,
"eval_samples_per_second": 9.996,
"eval_steps_per_second": 0.317,
"step": 1250
},
{
"epoch": 1.13,
"learning_rate": 3.113207547169811e-05,
"loss": 0.6113,
"step": 1500
},
{
"epoch": 1.13,
"eval_bleu": 53.3832,
"eval_gen_len": 42.0814,
"eval_loss": 0.6121014952659607,
"eval_runtime": 226.9957,
"eval_samples_per_second": 9.736,
"eval_steps_per_second": 0.308,
"step": 1500
},
{
"epoch": 1.32,
"learning_rate": 2.7987421383647798e-05,
"loss": 0.5779,
"step": 1750
},
{
"epoch": 1.32,
"eval_bleu": 54.2053,
"eval_gen_len": 42.462,
"eval_loss": 0.5953919291496277,
"eval_runtime": 231.1354,
"eval_samples_per_second": 9.561,
"eval_steps_per_second": 0.303,
"step": 1750
},
{
"epoch": 1.51,
"learning_rate": 2.4842767295597485e-05,
"loss": 0.5707,
"step": 2000
},
{
"epoch": 1.51,
"eval_bleu": 54.7121,
"eval_gen_len": 42.3439,
"eval_loss": 0.5879282355308533,
"eval_runtime": 222.7105,
"eval_samples_per_second": 9.923,
"eval_steps_per_second": 0.314,
"step": 2000
},
{
"epoch": 1.7,
"learning_rate": 2.1698113207547172e-05,
"loss": 0.5765,
"step": 2250
},
{
"epoch": 1.7,
"eval_bleu": 54.8816,
"eval_gen_len": 42.2819,
"eval_loss": 0.5788756608963013,
"eval_runtime": 229.4679,
"eval_samples_per_second": 9.631,
"eval_steps_per_second": 0.305,
"step": 2250
},
{
"epoch": 1.89,
"learning_rate": 1.8553459119496856e-05,
"loss": 0.5617,
"step": 2500
},
{
"epoch": 1.89,
"eval_bleu": 55.246,
"eval_gen_len": 42.1756,
"eval_loss": 0.5728903412818909,
"eval_runtime": 223.2414,
"eval_samples_per_second": 9.9,
"eval_steps_per_second": 0.314,
"step": 2500
},
{
"epoch": 2.08,
"learning_rate": 1.540880503144654e-05,
"loss": 0.5349,
"step": 2750
},
{
"epoch": 2.08,
"eval_bleu": 55.2927,
"eval_gen_len": 42.238,
"eval_loss": 0.5718762874603271,
"eval_runtime": 223.5775,
"eval_samples_per_second": 9.885,
"eval_steps_per_second": 0.313,
"step": 2750
},
{
"epoch": 2.26,
"learning_rate": 1.2264150943396227e-05,
"loss": 0.5171,
"step": 3000
},
{
"epoch": 2.26,
"eval_bleu": 55.3323,
"eval_gen_len": 42.291,
"eval_loss": 0.5671454071998596,
"eval_runtime": 227.9511,
"eval_samples_per_second": 9.695,
"eval_steps_per_second": 0.307,
"step": 3000
},
{
"epoch": 2.45,
"learning_rate": 9.119496855345912e-06,
"loss": 0.512,
"step": 3250
},
{
"epoch": 2.45,
"eval_bleu": 55.4991,
"eval_gen_len": 42.2593,
"eval_loss": 0.5644388794898987,
"eval_runtime": 232.3325,
"eval_samples_per_second": 9.512,
"eval_steps_per_second": 0.301,
"step": 3250
},
{
"epoch": 2.64,
"learning_rate": 5.974842767295598e-06,
"loss": 0.5129,
"step": 3500
},
{
"epoch": 2.64,
"eval_bleu": 55.4748,
"eval_gen_len": 42.1778,
"eval_loss": 0.5624849200248718,
"eval_runtime": 230.2857,
"eval_samples_per_second": 9.597,
"eval_steps_per_second": 0.304,
"step": 3500
},
{
"epoch": 2.83,
"learning_rate": 2.830188679245283e-06,
"loss": 0.5058,
"step": 3750
},
{
"epoch": 2.83,
"eval_bleu": 55.546,
"eval_gen_len": 42.2176,
"eval_loss": 0.560586154460907,
"eval_runtime": 231.1144,
"eval_samples_per_second": 9.562,
"eval_steps_per_second": 0.303,
"step": 3750
},
{
"epoch": 3.0,
"step": 3975,
"total_flos": 3.068708155215053e+16,
"train_loss": 0.6158648508899617,
"train_runtime": 15318.3168,
"train_samples_per_second": 8.298,
"train_steps_per_second": 0.259
}
],
"logging_steps": 250,
"max_steps": 3975,
"num_train_epochs": 3,
"save_steps": 10000,
"total_flos": 3.068708155215053e+16,
"trial_name": null,
"trial_params": null
}