umsuka-en-zu / trainer_state.json
MUNasir's picture
Upload trainer_state.json
bb6f0e5
raw
history blame
8.09 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.3929173693086,
"global_step": 11500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.84,
"learning_rate": 9.578414839797639e-06,
"loss": 0.3136,
"step": 500
},
{
"epoch": 1.0,
"eval_bleu": 17.1673,
"eval_gen_len": 43.5832,
"eval_loss": 2.7004430294036865,
"eval_runtime": 263.1603,
"eval_samples_per_second": 3.792,
"eval_steps_per_second": 0.475,
"step": 593
},
{
"epoch": 1.69,
"learning_rate": 9.156829679595279e-06,
"loss": 0.2962,
"step": 1000
},
{
"epoch": 2.0,
"eval_bleu": 17.2012,
"eval_gen_len": 43.6693,
"eval_loss": 2.719139814376831,
"eval_runtime": 263.247,
"eval_samples_per_second": 3.791,
"eval_steps_per_second": 0.475,
"step": 1186
},
{
"epoch": 2.53,
"learning_rate": 8.735244519392918e-06,
"loss": 0.2927,
"step": 1500
},
{
"epoch": 3.0,
"eval_bleu": 17.2291,
"eval_gen_len": 43.482,
"eval_loss": 2.7412936687469482,
"eval_runtime": 260.7955,
"eval_samples_per_second": 3.827,
"eval_steps_per_second": 0.479,
"step": 1779
},
{
"epoch": 3.37,
"learning_rate": 8.313659359190556e-06,
"loss": 0.2677,
"step": 2000
},
{
"epoch": 4.0,
"eval_bleu": 17.135,
"eval_gen_len": 43.5862,
"eval_loss": 2.7617862224578857,
"eval_runtime": 261.1749,
"eval_samples_per_second": 3.821,
"eval_steps_per_second": 0.479,
"step": 2372
},
{
"epoch": 4.22,
"learning_rate": 7.892074198988196e-06,
"loss": 0.2591,
"step": 2500
},
{
"epoch": 5.0,
"eval_bleu": 17.5543,
"eval_gen_len": 43.5922,
"eval_loss": 2.7780115604400635,
"eval_runtime": 262.647,
"eval_samples_per_second": 3.8,
"eval_steps_per_second": 0.476,
"step": 2965
},
{
"epoch": 5.06,
"learning_rate": 7.470489038785835e-06,
"loss": 0.2473,
"step": 3000
},
{
"epoch": 5.9,
"learning_rate": 7.048903878583474e-06,
"loss": 0.2282,
"step": 3500
},
{
"epoch": 6.0,
"eval_bleu": 17.226,
"eval_gen_len": 43.6703,
"eval_loss": 2.794311761856079,
"eval_runtime": 263.8826,
"eval_samples_per_second": 3.782,
"eval_steps_per_second": 0.474,
"step": 3558
},
{
"epoch": 6.75,
"learning_rate": 6.6273187183811136e-06,
"loss": 0.2244,
"step": 4000
},
{
"epoch": 7.0,
"eval_bleu": 17.615,
"eval_gen_len": 43.6934,
"eval_loss": 2.808680295944214,
"eval_runtime": 264.0527,
"eval_samples_per_second": 3.78,
"eval_steps_per_second": 0.473,
"step": 4151
},
{
"epoch": 7.59,
"learning_rate": 6.2057335581787524e-06,
"loss": 0.2196,
"step": 4500
},
{
"epoch": 8.0,
"eval_bleu": 17.3227,
"eval_gen_len": 43.7715,
"eval_loss": 2.825133800506592,
"eval_runtime": 281.974,
"eval_samples_per_second": 3.539,
"eval_steps_per_second": 0.443,
"step": 4744
},
{
"epoch": 8.43,
"learning_rate": 5.784148397976391e-06,
"loss": 0.2101,
"step": 5000
},
{
"epoch": 9.0,
"eval_bleu": 17.5072,
"eval_gen_len": 43.7084,
"eval_loss": 2.834676742553711,
"eval_runtime": 263.2231,
"eval_samples_per_second": 3.791,
"eval_steps_per_second": 0.475,
"step": 5337
},
{
"epoch": 9.27,
"learning_rate": 5.362563237774031e-06,
"loss": 0.2077,
"step": 5500
},
{
"epoch": 10.0,
"eval_bleu": 17.5712,
"eval_gen_len": 43.8597,
"eval_loss": 2.842376708984375,
"eval_runtime": 270.4686,
"eval_samples_per_second": 3.69,
"eval_steps_per_second": 0.462,
"step": 5930
},
{
"epoch": 10.12,
"learning_rate": 4.94097807757167e-06,
"loss": 0.2034,
"step": 6000
},
{
"epoch": 10.96,
"learning_rate": 4.519392917369309e-06,
"loss": 0.1968,
"step": 6500
},
{
"epoch": 11.0,
"eval_bleu": 17.6007,
"eval_gen_len": 43.6994,
"eval_loss": 2.851884365081787,
"eval_runtime": 261.9767,
"eval_samples_per_second": 3.809,
"eval_steps_per_second": 0.477,
"step": 6523
},
{
"epoch": 11.8,
"learning_rate": 4.097807757166948e-06,
"loss": 0.1902,
"step": 7000
},
{
"epoch": 12.0,
"eval_bleu": 17.6333,
"eval_gen_len": 43.6924,
"eval_loss": 2.8614132404327393,
"eval_runtime": 263.2972,
"eval_samples_per_second": 3.79,
"eval_steps_per_second": 0.475,
"step": 7116
},
{
"epoch": 12.65,
"learning_rate": 3.676222596964587e-06,
"loss": 0.198,
"step": 7500
},
{
"epoch": 13.0,
"eval_bleu": 17.6153,
"eval_gen_len": 43.7034,
"eval_loss": 2.865877866744995,
"eval_runtime": 261.219,
"eval_samples_per_second": 3.821,
"eval_steps_per_second": 0.479,
"step": 7709
},
{
"epoch": 13.49,
"learning_rate": 3.2546374367622263e-06,
"loss": 0.1861,
"step": 8000
},
{
"epoch": 14.0,
"eval_bleu": 17.5959,
"eval_gen_len": 43.7154,
"eval_loss": 2.873347043991089,
"eval_runtime": 260.1505,
"eval_samples_per_second": 3.836,
"eval_steps_per_second": 0.48,
"step": 8302
},
{
"epoch": 14.33,
"learning_rate": 2.8330522765598656e-06,
"loss": 0.1956,
"step": 8500
},
{
"epoch": 15.0,
"eval_bleu": 17.6169,
"eval_gen_len": 43.7164,
"eval_loss": 2.876323938369751,
"eval_runtime": 261.1714,
"eval_samples_per_second": 3.821,
"eval_steps_per_second": 0.479,
"step": 8895
},
{
"epoch": 15.18,
"learning_rate": 2.4114671163575045e-06,
"loss": 0.1924,
"step": 9000
},
{
"epoch": 16.0,
"eval_bleu": 17.5443,
"eval_gen_len": 43.7194,
"eval_loss": 2.880269765853882,
"eval_runtime": 261.8101,
"eval_samples_per_second": 3.812,
"eval_steps_per_second": 0.477,
"step": 9488
},
{
"epoch": 16.02,
"learning_rate": 1.9898819561551434e-06,
"loss": 0.1946,
"step": 9500
},
{
"epoch": 16.86,
"learning_rate": 1.5682967959527825e-06,
"loss": 0.1946,
"step": 10000
},
{
"epoch": 17.0,
"eval_bleu": 17.577,
"eval_gen_len": 43.6142,
"eval_loss": 2.8834807872772217,
"eval_runtime": 259.2401,
"eval_samples_per_second": 3.85,
"eval_steps_per_second": 0.482,
"step": 10081
},
{
"epoch": 17.71,
"learning_rate": 1.1467116357504218e-06,
"loss": 0.1987,
"step": 10500
},
{
"epoch": 18.0,
"eval_bleu": 17.5677,
"eval_gen_len": 43.6623,
"eval_loss": 2.8818464279174805,
"eval_runtime": 269.8962,
"eval_samples_per_second": 3.698,
"eval_steps_per_second": 0.463,
"step": 10674
},
{
"epoch": 18.55,
"learning_rate": 7.251264755480608e-07,
"loss": 0.2011,
"step": 11000
},
{
"epoch": 19.0,
"eval_bleu": 17.6118,
"eval_gen_len": 43.7395,
"eval_loss": 2.882765531539917,
"eval_runtime": 265.3175,
"eval_samples_per_second": 3.762,
"eval_steps_per_second": 0.471,
"step": 11267
},
{
"epoch": 19.39,
"learning_rate": 3.0354131534569986e-07,
"loss": 0.2049,
"step": 11500
}
],
"max_steps": 11860,
"num_train_epochs": 20,
"total_flos": 1623592378957824.0,
"trial_name": null,
"trial_params": null
}