deberta-v3-base-orgs-v2 / trainer_state.json
nbroad's picture
nbroad HF staff
Model save
ebbba25
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 600,
"global_step": 2565,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 9.820662768031189e-05,
"loss": 0.2265,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 9.625730994152047e-05,
"loss": 0.0839,
"step": 100
},
{
"epoch": 0.18,
"learning_rate": 9.430799220272904e-05,
"loss": 0.0794,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 9.235867446393762e-05,
"loss": 0.0751,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 9.04093567251462e-05,
"loss": 0.0733,
"step": 250
},
{
"epoch": 0.35,
"learning_rate": 8.846003898635477e-05,
"loss": 0.0706,
"step": 300
},
{
"epoch": 0.41,
"learning_rate": 8.651072124756335e-05,
"loss": 0.0695,
"step": 350
},
{
"epoch": 0.47,
"learning_rate": 8.456140350877193e-05,
"loss": 0.071,
"step": 400
},
{
"epoch": 0.53,
"learning_rate": 8.26120857699805e-05,
"loss": 0.0691,
"step": 450
},
{
"epoch": 0.58,
"learning_rate": 8.066276803118908e-05,
"loss": 0.0668,
"step": 500
},
{
"epoch": 0.64,
"learning_rate": 7.871345029239767e-05,
"loss": 0.0659,
"step": 550
},
{
"epoch": 0.7,
"learning_rate": 7.676413255360623e-05,
"loss": 0.0706,
"step": 600
},
{
"epoch": 0.7,
"eval_accuracy": 0.9601979961832061,
"eval_f1": 0.769017980636238,
"eval_loss": 0.11379604041576385,
"eval_precision": 0.7589703588143526,
"eval_recall": 0.7793352022426913,
"eval_runtime": 3.4889,
"eval_samples_per_second": 859.867,
"eval_steps_per_second": 3.439,
"step": 600
},
{
"epoch": 0.76,
"learning_rate": 7.481481481481481e-05,
"loss": 0.0689,
"step": 650
},
{
"epoch": 0.82,
"learning_rate": 7.28654970760234e-05,
"loss": 0.0658,
"step": 700
},
{
"epoch": 0.88,
"learning_rate": 7.091617933723196e-05,
"loss": 0.0645,
"step": 750
},
{
"epoch": 0.94,
"learning_rate": 6.896686159844055e-05,
"loss": 0.0663,
"step": 800
},
{
"epoch": 0.99,
"learning_rate": 6.701754385964913e-05,
"loss": 0.063,
"step": 850
},
{
"epoch": 1.05,
"learning_rate": 6.50682261208577e-05,
"loss": 0.0537,
"step": 900
},
{
"epoch": 1.11,
"learning_rate": 6.311890838206628e-05,
"loss": 0.055,
"step": 950
},
{
"epoch": 1.17,
"learning_rate": 6.116959064327486e-05,
"loss": 0.054,
"step": 1000
},
{
"epoch": 1.23,
"learning_rate": 5.9220272904483434e-05,
"loss": 0.0532,
"step": 1050
},
{
"epoch": 1.29,
"learning_rate": 5.727095516569201e-05,
"loss": 0.0534,
"step": 1100
},
{
"epoch": 1.35,
"learning_rate": 5.5321637426900584e-05,
"loss": 0.0516,
"step": 1150
},
{
"epoch": 1.4,
"learning_rate": 5.3372319688109166e-05,
"loss": 0.0526,
"step": 1200
},
{
"epoch": 1.4,
"eval_accuracy": 0.9617127862595419,
"eval_f1": 0.7870276823600727,
"eval_loss": 0.11129256337881088,
"eval_precision": 0.7942495921696574,
"eval_recall": 0.7799359231077293,
"eval_runtime": 3.4578,
"eval_samples_per_second": 867.603,
"eval_steps_per_second": 3.47,
"step": 1200
},
{
"epoch": 1.46,
"learning_rate": 5.142300194931774e-05,
"loss": 0.0512,
"step": 1250
},
{
"epoch": 1.52,
"learning_rate": 4.9473684210526315e-05,
"loss": 0.0515,
"step": 1300
},
{
"epoch": 1.58,
"learning_rate": 4.752436647173489e-05,
"loss": 0.048,
"step": 1350
},
{
"epoch": 1.64,
"learning_rate": 4.557504873294347e-05,
"loss": 0.0509,
"step": 1400
},
{
"epoch": 1.7,
"learning_rate": 4.362573099415205e-05,
"loss": 0.0495,
"step": 1450
},
{
"epoch": 1.75,
"learning_rate": 4.167641325536062e-05,
"loss": 0.0531,
"step": 1500
},
{
"epoch": 1.81,
"learning_rate": 3.97270955165692e-05,
"loss": 0.0524,
"step": 1550
},
{
"epoch": 1.87,
"learning_rate": 3.777777777777778e-05,
"loss": 0.0496,
"step": 1600
},
{
"epoch": 1.93,
"learning_rate": 3.582846003898635e-05,
"loss": 0.0487,
"step": 1650
},
{
"epoch": 1.99,
"learning_rate": 3.3879142300194935e-05,
"loss": 0.0514,
"step": 1700
},
{
"epoch": 2.05,
"learning_rate": 3.192982456140351e-05,
"loss": 0.0406,
"step": 1750
},
{
"epoch": 2.11,
"learning_rate": 2.9980506822612088e-05,
"loss": 0.0409,
"step": 1800
},
{
"epoch": 2.11,
"eval_accuracy": 0.9626550572519084,
"eval_f1": 0.7874886855073921,
"eval_loss": 0.1125219315290451,
"eval_precision": 0.7910689028086482,
"eval_recall": 0.7839407288746496,
"eval_runtime": 3.4674,
"eval_samples_per_second": 865.198,
"eval_steps_per_second": 3.461,
"step": 1800
},
{
"epoch": 2.16,
"learning_rate": 2.8031189083820663e-05,
"loss": 0.0386,
"step": 1850
},
{
"epoch": 2.22,
"learning_rate": 2.608187134502924e-05,
"loss": 0.0387,
"step": 1900
},
{
"epoch": 2.28,
"learning_rate": 2.413255360623782e-05,
"loss": 0.0399,
"step": 1950
},
{
"epoch": 2.34,
"learning_rate": 2.2183235867446394e-05,
"loss": 0.0379,
"step": 2000
},
{
"epoch": 2.4,
"learning_rate": 2.0233918128654973e-05,
"loss": 0.038,
"step": 2050
},
{
"epoch": 2.46,
"learning_rate": 1.8284600389863547e-05,
"loss": 0.04,
"step": 2100
},
{
"epoch": 2.51,
"learning_rate": 1.6335282651072126e-05,
"loss": 0.0382,
"step": 2150
},
{
"epoch": 2.57,
"learning_rate": 1.4385964912280702e-05,
"loss": 0.0384,
"step": 2200
},
{
"epoch": 2.63,
"learning_rate": 1.243664717348928e-05,
"loss": 0.038,
"step": 2250
},
{
"epoch": 2.69,
"learning_rate": 1.0487329434697857e-05,
"loss": 0.0399,
"step": 2300
},
{
"epoch": 2.75,
"learning_rate": 8.538011695906434e-06,
"loss": 0.0379,
"step": 2350
},
{
"epoch": 2.81,
"learning_rate": 6.588693957115009e-06,
"loss": 0.0376,
"step": 2400
},
{
"epoch": 2.81,
"eval_accuracy": 0.9631560114503817,
"eval_f1": 0.7926541499948702,
"eval_loss": 0.11860690265893936,
"eval_precision": 0.8127498422049232,
"eval_recall": 0.7735282338806568,
"eval_runtime": 3.4445,
"eval_samples_per_second": 870.961,
"eval_steps_per_second": 3.484,
"step": 2400
},
{
"epoch": 2.87,
"learning_rate": 4.639376218323587e-06,
"loss": 0.036,
"step": 2450
},
{
"epoch": 2.92,
"learning_rate": 2.690058479532164e-06,
"loss": 0.0366,
"step": 2500
},
{
"epoch": 2.98,
"learning_rate": 7.407407407407408e-07,
"loss": 0.0375,
"step": 2550
},
{
"epoch": 3.0,
"step": 2565,
"total_flos": 1.7165637031885056e+16,
"train_loss": 0.056466960860507065,
"train_runtime": 473.6102,
"train_samples_per_second": 693.127,
"train_steps_per_second": 5.416
}
],
"logging_steps": 50,
"max_steps": 2565,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.7165637031885056e+16,
"trial_name": null,
"trial_params": null
}