sponsorblock-base-v1.1 / trainer_state.json
Joshua Lochner
Next training iteration (290k)
1850c9d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.31412202892914304,
"global_step": 105000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.925209040731157e-05,
"loss": 0.0716,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.850418081462313e-05,
"loss": 0.0741,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.77562712219347e-05,
"loss": 0.0753,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.7008361629246264e-05,
"loss": 0.0709,
"step": 20000
},
{
"epoch": 0.07,
"learning_rate": 4.6260452036557824e-05,
"loss": 0.0754,
"step": 25000
},
{
"epoch": 0.07,
"eval_loss": 0.07123171538114548,
"eval_runtime": 1351.5318,
"eval_samples_per_second": 13.741,
"eval_steps_per_second": 13.741,
"step": 25000
},
{
"epoch": 0.09,
"learning_rate": 4.551254244386939e-05,
"loss": 0.0745,
"step": 30000
},
{
"epoch": 0.1,
"learning_rate": 4.476463285118095e-05,
"loss": 0.0671,
"step": 35000
},
{
"epoch": 0.12,
"learning_rate": 4.401672325849251e-05,
"loss": 0.0687,
"step": 40000
},
{
"epoch": 0.13,
"learning_rate": 4.326881366580408e-05,
"loss": 0.0718,
"step": 45000
},
{
"epoch": 0.15,
"learning_rate": 4.252090407311564e-05,
"loss": 0.0692,
"step": 50000
},
{
"epoch": 0.15,
"eval_loss": 0.06770400702953339,
"eval_runtime": 1359.9214,
"eval_samples_per_second": 13.656,
"eval_steps_per_second": 13.656,
"step": 50000
},
{
"epoch": 0.16,
"learning_rate": 4.1772994480427206e-05,
"loss": 0.0695,
"step": 55000
},
{
"epoch": 0.18,
"learning_rate": 4.102508488773877e-05,
"loss": 0.0696,
"step": 60000
},
{
"epoch": 0.19,
"learning_rate": 4.0277175295050333e-05,
"loss": 0.0689,
"step": 65000
},
{
"epoch": 0.21,
"learning_rate": 3.95292657023619e-05,
"loss": 0.0679,
"step": 70000
},
{
"epoch": 0.22,
"learning_rate": 3.878135610967347e-05,
"loss": 0.071,
"step": 75000
},
{
"epoch": 0.22,
"eval_loss": 0.05809599161148071,
"eval_runtime": 2705.4449,
"eval_samples_per_second": 6.864,
"eval_steps_per_second": 6.864,
"step": 75000
},
{
"epoch": 0.24,
"learning_rate": 3.803344651698503e-05,
"loss": 0.0712,
"step": 80000
},
{
"epoch": 0.25,
"learning_rate": 3.7285536924296595e-05,
"loss": 0.0683,
"step": 85000
},
{
"epoch": 0.27,
"learning_rate": 3.653762733160816e-05,
"loss": 0.0673,
"step": 90000
},
{
"epoch": 0.28,
"learning_rate": 3.578971773891972e-05,
"loss": 0.0685,
"step": 95000
},
{
"epoch": 0.3,
"learning_rate": 3.504180814623129e-05,
"loss": 0.0676,
"step": 100000
},
{
"epoch": 0.3,
"eval_loss": 0.06122186779975891,
"eval_runtime": 2772.9648,
"eval_samples_per_second": 6.697,
"eval_steps_per_second": 6.697,
"step": 100000
},
{
"epoch": 0.31,
"learning_rate": 3.429389855354285e-05,
"loss": 0.0672,
"step": 105000
}
],
"max_steps": 334265,
"num_train_epochs": 1,
"total_flos": 6.792868897849498e+16,
"trial_name": null,
"trial_params": null
}