t5-deshuffle / trainer_state.json
marksverdhei's picture
Fix bug and re-train
7ca9fdf
{
"best_metric": 1.6302473545074463,
"best_model_checkpoint": "./results/t5-bow-decoder-base\\checkpoint-6750",
"epoch": 3.0,
"global_step": 6750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 0.0005,
"loss": 3.0243,
"step": 256
},
{
"epoch": 0.23,
"learning_rate": 0.001,
"loss": 1.9017,
"step": 512
},
{
"epoch": 0.34,
"learning_rate": 0.0009942456392735119,
"loss": 1.8801,
"step": 768
},
{
"epoch": 0.46,
"learning_rate": 0.000988491278547024,
"loss": 1.875,
"step": 1024
},
{
"epoch": 0.57,
"learning_rate": 0.0009827369178205358,
"loss": 1.7517,
"step": 1280
},
{
"epoch": 0.68,
"learning_rate": 0.000976982557094048,
"loss": 1.8016,
"step": 1536
},
{
"epoch": 0.8,
"learning_rate": 0.0009712281963675598,
"loss": 1.7453,
"step": 1792
},
{
"epoch": 0.91,
"learning_rate": 0.0009654738356410718,
"loss": 1.7931,
"step": 2048
},
{
"epoch": 1.0,
"eval_loss": 1.6831984519958496,
"eval_runtime": 18.9287,
"eval_samples_per_second": 52.83,
"eval_steps_per_second": 6.604,
"step": 2250
},
{
"epoch": 1.02,
"learning_rate": 0.0009597194749145838,
"loss": 1.7622,
"step": 2304
},
{
"epoch": 1.14,
"learning_rate": 0.0009539651141880957,
"loss": 1.5755,
"step": 2560
},
{
"epoch": 1.25,
"learning_rate": 0.0009482107534616077,
"loss": 1.6284,
"step": 2816
},
{
"epoch": 1.37,
"learning_rate": 0.0009424563927351196,
"loss": 1.6671,
"step": 3072
},
{
"epoch": 1.48,
"learning_rate": 0.0009367020320086316,
"loss": 1.5683,
"step": 3328
},
{
"epoch": 1.59,
"learning_rate": 0.0009309476712821435,
"loss": 1.618,
"step": 3584
},
{
"epoch": 1.71,
"learning_rate": 0.0009251933105556555,
"loss": 1.6381,
"step": 3840
},
{
"epoch": 1.82,
"learning_rate": 0.0009194389498291674,
"loss": 1.6486,
"step": 4096
},
{
"epoch": 1.93,
"learning_rate": 0.0009136845891026794,
"loss": 1.6367,
"step": 4352
},
{
"epoch": 2.0,
"eval_loss": 1.6470595598220825,
"eval_runtime": 19.0654,
"eval_samples_per_second": 52.451,
"eval_steps_per_second": 6.556,
"step": 4500
},
{
"epoch": 2.05,
"learning_rate": 0.0009079302283761914,
"loss": 1.527,
"step": 4608
},
{
"epoch": 2.16,
"learning_rate": 0.0009021758676497032,
"loss": 1.4307,
"step": 4864
},
{
"epoch": 2.28,
"learning_rate": 0.0008964215069232152,
"loss": 1.4458,
"step": 5120
},
{
"epoch": 2.39,
"learning_rate": 0.0008906671461967272,
"loss": 1.4737,
"step": 5376
},
{
"epoch": 2.5,
"learning_rate": 0.0008849127854702393,
"loss": 1.4935,
"step": 5632
},
{
"epoch": 2.62,
"learning_rate": 0.0008791584247437511,
"loss": 1.4757,
"step": 5888
},
{
"epoch": 2.73,
"learning_rate": 0.0008734040640172631,
"loss": 1.4712,
"step": 6144
},
{
"epoch": 2.84,
"learning_rate": 0.0008676497032907751,
"loss": 1.4494,
"step": 6400
},
{
"epoch": 2.96,
"learning_rate": 0.000861895342564287,
"loss": 1.4335,
"step": 6656
},
{
"epoch": 3.0,
"eval_loss": 1.6302473545074463,
"eval_runtime": 19.2598,
"eval_samples_per_second": 51.922,
"eval_steps_per_second": 6.49,
"step": 6750
}
],
"max_steps": 45000,
"num_train_epochs": 20,
"total_flos": 1.644186304512e+16,
"trial_name": null,
"trial_params": null
}