t5-small-e2e-qa-full / trainer_state.json
longcld's picture
loss 0.6
03f4e3d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21722012544462244,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.0006992397089171283,
"loss": 0.5664,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.0006984794178342565,
"loss": 0.5958,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 0.0006977191267513849,
"loss": 0.599,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 0.0006969588356685131,
"loss": 0.6111,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 0.0006961985445856413,
"loss": 0.5763,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 0.0006954382535027696,
"loss": 0.6012,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 0.0006946779624198979,
"loss": 0.6054,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 0.0006939176713370262,
"loss": 0.5922,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 0.0006931573802541545,
"loss": 0.5865,
"step": 900
},
{
"epoch": 0.11,
"learning_rate": 0.0006923970891712827,
"loss": 0.6263,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 0.0006916367980884109,
"loss": 0.6332,
"step": 1100
},
{
"epoch": 0.13,
"learning_rate": 0.0006908765070055393,
"loss": 0.618,
"step": 1200
},
{
"epoch": 0.14,
"learning_rate": 0.0006901162159226675,
"loss": 0.6272,
"step": 1300
},
{
"epoch": 0.15,
"learning_rate": 0.0006893559248397958,
"loss": 0.6505,
"step": 1400
},
{
"epoch": 0.16,
"learning_rate": 0.0006885956337569241,
"loss": 0.6179,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 0.0006878353426740524,
"loss": 0.6509,
"step": 1600
},
{
"epoch": 0.18,
"learning_rate": 0.0006870750515911806,
"loss": 0.6487,
"step": 1700
},
{
"epoch": 0.2,
"learning_rate": 0.0006863147605083089,
"loss": 0.6341,
"step": 1800
},
{
"epoch": 0.21,
"learning_rate": 0.0006855544694254371,
"loss": 0.6408,
"step": 1900
},
{
"epoch": 0.22,
"learning_rate": 0.0006847941783425654,
"loss": 0.6413,
"step": 2000
}
],
"max_steps": 92070,
"num_train_epochs": 10,
"total_flos": 4097763812966400.0,
"trial_name": null,
"trial_params": null
}