sentiment-pt-pl20-0 / trainer_state.json
apwic's picture
End of training
07f7d02 verified
raw
history blame contribute delete
No virus
11 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.196406364440918,
"learning_rate": 4.75e-05,
"loss": 0.5417,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7543859649122807,
"eval_f1": 0.6730602006688964,
"eval_loss": 0.47316503524780273,
"eval_precision": 0.7027985359158151,
"eval_recall": 0.6612111292962357,
"eval_runtime": 1.7797,
"eval_samples_per_second": 224.194,
"eval_steps_per_second": 28.094,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.380220651626587,
"learning_rate": 4.5e-05,
"loss": 0.4395,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7919799498746867,
"eval_f1": 0.7705259948585406,
"eval_loss": 0.4128379225730896,
"eval_precision": 0.7612839958158996,
"eval_recall": 0.8028277868703401,
"eval_runtime": 1.7767,
"eval_samples_per_second": 224.574,
"eval_steps_per_second": 28.142,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 2.2583518028259277,
"learning_rate": 4.25e-05,
"loss": 0.3319,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8315033783783784,
"eval_loss": 0.32298392057418823,
"eval_precision": 0.8438775510204082,
"eval_recall": 0.8217403164211674,
"eval_runtime": 1.7811,
"eval_samples_per_second": 224.023,
"eval_steps_per_second": 28.073,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 0.8704787492752075,
"learning_rate": 4e-05,
"loss": 0.2873,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8237962290701417,
"eval_loss": 0.322201669216156,
"eval_precision": 0.8201159969225307,
"eval_recall": 0.8278777959629023,
"eval_runtime": 1.7743,
"eval_samples_per_second": 224.873,
"eval_steps_per_second": 28.18,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.8079779148101807,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2571,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8484099018899409,
"eval_loss": 0.29681602120399475,
"eval_precision": 0.8430645161290322,
"eval_recall": 0.8545644662665939,
"eval_runtime": 1.7796,
"eval_samples_per_second": 224.208,
"eval_steps_per_second": 28.096,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.8642373085021973,
"learning_rate": 3.5e-05,
"loss": 0.2443,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8466330637850383,
"eval_loss": 0.29177311062812805,
"eval_precision": 0.8353276671885485,
"eval_recall": 0.8635206401163849,
"eval_runtime": 1.7765,
"eval_samples_per_second": 224.594,
"eval_steps_per_second": 28.145,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.3272399306297302,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2256,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8447157518450185,
"eval_loss": 0.2981509566307068,
"eval_precision": 0.8325401217487549,
"eval_recall": 0.864248045099109,
"eval_runtime": 1.7799,
"eval_samples_per_second": 224.168,
"eval_steps_per_second": 28.091,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 2.9290239810943604,
"learning_rate": 3e-05,
"loss": 0.2172,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8646934961080748,
"eval_loss": 0.2722471356391907,
"eval_precision": 0.882551000198059,
"eval_recall": 0.8512456810329151,
"eval_runtime": 1.7818,
"eval_samples_per_second": 223.935,
"eval_steps_per_second": 28.062,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.677186012268066,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2049,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8748655913978494,
"eval_loss": 0.264840692281723,
"eval_precision": 0.86983032873807,
"eval_recall": 0.8805237315875614,
"eval_runtime": 1.7819,
"eval_samples_per_second": 223.922,
"eval_steps_per_second": 28.06,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 2.770735502243042,
"learning_rate": 2.5e-05,
"loss": 0.1914,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8848664457009163,
"eval_loss": 0.2680298984050751,
"eval_precision": 0.8977236138837015,
"eval_recall": 0.8743862520458265,
"eval_runtime": 1.7909,
"eval_samples_per_second": 222.787,
"eval_steps_per_second": 27.918,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 0.3618270754814148,
"learning_rate": 2.25e-05,
"loss": 0.1724,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8808243727598566,
"eval_loss": 0.264539897441864,
"eval_precision": 0.875706963591375,
"eval_recall": 0.8865702855064557,
"eval_runtime": 1.7802,
"eval_samples_per_second": 224.138,
"eval_steps_per_second": 28.087,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 2.054783582687378,
"learning_rate": 2e-05,
"loss": 0.1689,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.881931703852755,
"eval_loss": 0.27462852001190186,
"eval_precision": 0.8740012737378415,
"eval_recall": 0.8915711947626841,
"eval_runtime": 1.778,
"eval_samples_per_second": 224.405,
"eval_steps_per_second": 28.121,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 0.6106524467468262,
"learning_rate": 1.75e-05,
"loss": 0.1473,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8800854160075926,
"eval_loss": 0.28370755910873413,
"eval_precision": 0.9002425410326267,
"eval_recall": 0.8651118385160939,
"eval_runtime": 1.785,
"eval_samples_per_second": 223.535,
"eval_steps_per_second": 28.012,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 4.3967108726501465,
"learning_rate": 1.5e-05,
"loss": 0.1577,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.884617951284618,
"eval_loss": 0.2892190217971802,
"eval_precision": 0.8772893772893773,
"eval_recall": 0.8933442444080741,
"eval_runtime": 1.7818,
"eval_samples_per_second": 223.936,
"eval_steps_per_second": 28.062,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.48143357038497925,
"learning_rate": 1.25e-05,
"loss": 0.1468,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8829621606985718,
"eval_loss": 0.27894169092178345,
"eval_precision": 0.8802419354838709,
"eval_recall": 0.8858428805237315,
"eval_runtime": 1.7862,
"eval_samples_per_second": 223.376,
"eval_steps_per_second": 27.992,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 0.6658376455307007,
"learning_rate": 1e-05,
"loss": 0.1473,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8775533117267087,
"eval_loss": 0.28521186113357544,
"eval_precision": 0.873246730188791,
"eval_recall": 0.8822967812329514,
"eval_runtime": 1.778,
"eval_samples_per_second": 224.41,
"eval_steps_per_second": 28.121,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 1.4181182384490967,
"learning_rate": 7.5e-06,
"loss": 0.1274,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8856836962422341,
"eval_loss": 0.28584179282188416,
"eval_precision": 0.8838235294117647,
"eval_recall": 0.8876159301691217,
"eval_runtime": 1.7792,
"eval_samples_per_second": 224.253,
"eval_steps_per_second": 28.102,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 5.91610050201416,
"learning_rate": 5e-06,
"loss": 0.1318,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8802521008403361,
"eval_loss": 0.29269006848335266,
"eval_precision": 0.8767168083714847,
"eval_recall": 0.8840698308783415,
"eval_runtime": 1.7862,
"eval_samples_per_second": 223.383,
"eval_steps_per_second": 27.993,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 6.210901737213135,
"learning_rate": 2.5e-06,
"loss": 0.1355,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8884169154604891,
"eval_loss": 0.2884277403354645,
"eval_precision": 0.8874630556728391,
"eval_recall": 0.8893889798145117,
"eval_runtime": 1.7806,
"eval_samples_per_second": 224.08,
"eval_steps_per_second": 28.08,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 1.5748217105865479,
"learning_rate": 0.0,
"loss": 0.1367,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8884169154604891,
"eval_loss": 0.2881532311439514,
"eval_precision": 0.8874630556728391,
"eval_recall": 0.8893889798145117,
"eval_runtime": 1.7811,
"eval_samples_per_second": 224.016,
"eval_steps_per_second": 28.072,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.2206378909408069,
"train_runtime": 624.8021,
"train_samples_per_second": 116.453,
"train_steps_per_second": 3.905
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}