sponsorblock-base-v1 / trainer_state.json
Joshua Lochner
Add model files (2.43m) - finetuned from https://huggingface.co/EColi/sponsorblock-base-v1
d1c8305
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5748183573990618,
"global_step": 200000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.992814770532512e-05,
"loss": 0.0668,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.985629541065024e-05,
"loss": 0.0693,
"step": 10000
},
{
"epoch": 0.03,
"eval_loss": 0.0688803568482399,
"eval_runtime": 645.0212,
"eval_samples_per_second": 29.968,
"eval_steps_per_second": 29.968,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.978444311597535e-05,
"loss": 0.0634,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.971259082130047e-05,
"loss": 0.0637,
"step": 20000
},
{
"epoch": 0.06,
"eval_loss": 0.05873561650514603,
"eval_runtime": 644.2466,
"eval_samples_per_second": 30.004,
"eval_steps_per_second": 30.004,
"step": 20000
},
{
"epoch": 0.07,
"learning_rate": 4.964073852662559e-05,
"loss": 0.0605,
"step": 25000
},
{
"epoch": 0.09,
"learning_rate": 4.9568886231950703e-05,
"loss": 0.0633,
"step": 30000
},
{
"epoch": 0.09,
"eval_loss": 0.05583559721708298,
"eval_runtime": 644.4128,
"eval_samples_per_second": 29.996,
"eval_steps_per_second": 29.996,
"step": 30000
},
{
"epoch": 0.1,
"learning_rate": 4.9497033937275825e-05,
"loss": 0.0635,
"step": 35000
},
{
"epoch": 0.11,
"learning_rate": 4.9425181642600946e-05,
"loss": 0.0584,
"step": 40000
},
{
"epoch": 0.11,
"eval_loss": 0.06174061447381973,
"eval_runtime": 644.7843,
"eval_samples_per_second": 29.979,
"eval_steps_per_second": 29.979,
"step": 40000
},
{
"epoch": 0.13,
"learning_rate": 4.935332934792606e-05,
"loss": 0.0592,
"step": 45000
},
{
"epoch": 0.14,
"learning_rate": 4.9281477053251175e-05,
"loss": 0.061,
"step": 50000
},
{
"epoch": 0.14,
"eval_loss": 0.05651352182030678,
"eval_runtime": 642.7605,
"eval_samples_per_second": 30.073,
"eval_steps_per_second": 30.073,
"step": 50000
},
{
"epoch": 0.16,
"learning_rate": 4.92096247585763e-05,
"loss": 0.0693,
"step": 55000
},
{
"epoch": 0.17,
"learning_rate": 4.913777246390141e-05,
"loss": 0.0652,
"step": 60000
},
{
"epoch": 0.17,
"eval_loss": 0.055016856640577316,
"eval_runtime": 641.3853,
"eval_samples_per_second": 30.138,
"eval_steps_per_second": 30.138,
"step": 60000
},
{
"epoch": 0.19,
"learning_rate": 4.9065920169226526e-05,
"loss": 0.0665,
"step": 65000
},
{
"epoch": 0.2,
"learning_rate": 4.899406787455164e-05,
"loss": 0.0646,
"step": 70000
},
{
"epoch": 0.2,
"eval_loss": 0.05567777901887894,
"eval_runtime": 642.5363,
"eval_samples_per_second": 30.084,
"eval_steps_per_second": 30.084,
"step": 70000
},
{
"epoch": 0.22,
"learning_rate": 4.892221557987676e-05,
"loss": 0.0617,
"step": 75000
},
{
"epoch": 0.23,
"learning_rate": 4.8850363285201876e-05,
"loss": 0.0648,
"step": 80000
},
{
"epoch": 0.23,
"eval_loss": 0.05431482940912247,
"eval_runtime": 641.7629,
"eval_samples_per_second": 30.12,
"eval_steps_per_second": 30.12,
"step": 80000
},
{
"epoch": 0.24,
"learning_rate": 4.877851099052699e-05,
"loss": 0.0654,
"step": 85000
},
{
"epoch": 0.26,
"learning_rate": 4.870665869585211e-05,
"loss": 0.0664,
"step": 90000
},
{
"epoch": 0.26,
"eval_loss": 0.05483051761984825,
"eval_runtime": 643.1769,
"eval_samples_per_second": 30.054,
"eval_steps_per_second": 30.054,
"step": 90000
},
{
"epoch": 0.27,
"learning_rate": 4.863480640117723e-05,
"loss": 0.0649,
"step": 95000
},
{
"epoch": 0.29,
"learning_rate": 4.856295410650235e-05,
"loss": 0.0615,
"step": 100000
},
{
"epoch": 0.29,
"eval_loss": 0.061954181641340256,
"eval_runtime": 641.5389,
"eval_samples_per_second": 30.131,
"eval_steps_per_second": 30.131,
"step": 100000
},
{
"epoch": 0.3,
"learning_rate": 4.849110181182747e-05,
"loss": 0.0596,
"step": 105000
},
{
"epoch": 0.32,
"learning_rate": 4.8419249517152584e-05,
"loss": 0.0629,
"step": 110000
},
{
"epoch": 0.32,
"eval_loss": 0.06062796711921692,
"eval_runtime": 642.0211,
"eval_samples_per_second": 30.108,
"eval_steps_per_second": 30.108,
"step": 110000
},
{
"epoch": 0.33,
"learning_rate": 4.83473972224777e-05,
"loss": 0.0613,
"step": 115000
},
{
"epoch": 0.34,
"learning_rate": 4.827554492780282e-05,
"loss": 0.0683,
"step": 120000
},
{
"epoch": 0.34,
"eval_loss": 0.055720094591379166,
"eval_runtime": 642.5633,
"eval_samples_per_second": 30.083,
"eval_steps_per_second": 30.083,
"step": 120000
},
{
"epoch": 0.36,
"learning_rate": 4.8203692633127935e-05,
"loss": 0.0631,
"step": 125000
},
{
"epoch": 0.37,
"learning_rate": 4.813184033845305e-05,
"loss": 0.0649,
"step": 130000
},
{
"epoch": 0.37,
"eval_loss": 0.06274710595607758,
"eval_runtime": 642.0067,
"eval_samples_per_second": 30.109,
"eval_steps_per_second": 30.109,
"step": 130000
},
{
"epoch": 0.39,
"learning_rate": 4.805998804377817e-05,
"loss": 0.0642,
"step": 135000
},
{
"epoch": 0.4,
"learning_rate": 4.7988135749103285e-05,
"loss": 0.064,
"step": 140000
},
{
"epoch": 0.4,
"eval_loss": 0.05436055734753609,
"eval_runtime": 642.9629,
"eval_samples_per_second": 30.064,
"eval_steps_per_second": 30.064,
"step": 140000
},
{
"epoch": 0.42,
"learning_rate": 4.79162834544284e-05,
"loss": 0.0592,
"step": 145000
},
{
"epoch": 0.43,
"learning_rate": 4.784443115975352e-05,
"loss": 0.0574,
"step": 150000
},
{
"epoch": 0.43,
"eval_loss": 0.059992264956235886,
"eval_runtime": 642.342,
"eval_samples_per_second": 30.093,
"eval_steps_per_second": 30.093,
"step": 150000
},
{
"epoch": 0.45,
"learning_rate": 4.7772578865078636e-05,
"loss": 0.0637,
"step": 155000
},
{
"epoch": 0.46,
"learning_rate": 4.770072657040375e-05,
"loss": 0.0599,
"step": 160000
},
{
"epoch": 0.46,
"eval_loss": 0.05961042642593384,
"eval_runtime": 642.0137,
"eval_samples_per_second": 30.108,
"eval_steps_per_second": 30.108,
"step": 160000
},
{
"epoch": 0.47,
"learning_rate": 4.762887427572887e-05,
"loss": 0.0618,
"step": 165000
},
{
"epoch": 0.5,
"learning_rate": 4.748516968637911e-05,
"loss": 0.0619,
"step": 175000
},
{
"epoch": 0.57,
"learning_rate": 4.7125908213004694e-05,
"loss": 0.0558,
"step": 200000
},
{
"epoch": 0.57,
"eval_loss": 0.061555784195661545,
"eval_runtime": 654.4106,
"eval_samples_per_second": 29.538,
"eval_steps_per_second": 29.538,
"step": 200000
}
],
"max_steps": 3479360,
"num_train_epochs": 10,
"total_flos": 6.998757674752512e+16,
"trial_name": null,
"trial_params": null
}