tibetan-bert_tncc-title_tsheg / trainer_state.json
metaphors's picture
Upload 7 files
5ba3b96 verified
{
"best_metric": 0.6057453163676313,
"best_model_checkpoint": "../saved_model/tibetan-bert_tncc-title_v3/checkpoint-4640",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 4640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5717367853290184,
"eval_loss": 1.3059929609298706,
"eval_macro-f1": 0.4599932337899495,
"eval_macro-precision": 0.5198165980322329,
"eval_macro-recall": 0.47524513594607626,
"eval_runtime": 7.3569,
"eval_samples_per_second": 126.004,
"eval_steps_per_second": 3.942,
"eval_weighted-f1": 0.5542294574300444,
"eval_weighted-precision": 0.591566389333376,
"eval_weighted-recall": 0.5717367853290184,
"step": 232
},
{
"epoch": 2.0,
"eval_accuracy": 0.6148867313915858,
"eval_loss": 1.2135106325149536,
"eval_macro-f1": 0.5584398853408242,
"eval_macro-precision": 0.5749611619851209,
"eval_macro-recall": 0.5695895916407867,
"eval_runtime": 7.3464,
"eval_samples_per_second": 126.185,
"eval_steps_per_second": 3.948,
"eval_weighted-f1": 0.608666635280638,
"eval_weighted-precision": 0.6187269920143599,
"eval_weighted-recall": 0.6148867313915858,
"step": 464
},
{
"epoch": 2.16,
"learning_rate": 4.461206896551724e-05,
"loss": 1.2716,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6170442286947141,
"eval_loss": 1.2330070734024048,
"eval_macro-f1": 0.5516603706806994,
"eval_macro-precision": 0.575159910949547,
"eval_macro-recall": 0.5503575961333385,
"eval_runtime": 7.2744,
"eval_samples_per_second": 127.433,
"eval_steps_per_second": 3.987,
"eval_weighted-f1": 0.6100508919269417,
"eval_weighted-precision": 0.6247775875728347,
"eval_weighted-recall": 0.6170442286947141,
"step": 696
},
{
"epoch": 4.0,
"eval_accuracy": 0.6192017259978425,
"eval_loss": 1.3646526336669922,
"eval_macro-f1": 0.5686376693802021,
"eval_macro-precision": 0.5941264267230656,
"eval_macro-recall": 0.5623900177716177,
"eval_runtime": 7.3362,
"eval_samples_per_second": 126.359,
"eval_steps_per_second": 3.953,
"eval_weighted-f1": 0.6165925170578278,
"eval_weighted-precision": 0.6295963103897588,
"eval_weighted-recall": 0.6192017259978425,
"step": 928
},
{
"epoch": 4.31,
"learning_rate": 3.922413793103448e-05,
"loss": 0.5952,
"step": 1000
},
{
"epoch": 5.0,
"eval_accuracy": 0.6256742179072277,
"eval_loss": 1.435981035232544,
"eval_macro-f1": 0.5864237155073179,
"eval_macro-precision": 0.641734562145522,
"eval_macro-recall": 0.5708896457267656,
"eval_runtime": 7.3366,
"eval_samples_per_second": 126.353,
"eval_steps_per_second": 3.953,
"eval_weighted-f1": 0.6206257136654274,
"eval_weighted-precision": 0.6340957047257687,
"eval_weighted-recall": 0.6256742179072277,
"step": 1160
},
{
"epoch": 6.0,
"eval_accuracy": 0.6213592233009708,
"eval_loss": 1.639809489250183,
"eval_macro-f1": 0.5665517506857142,
"eval_macro-precision": 0.579234133027197,
"eval_macro-recall": 0.5645134686065383,
"eval_runtime": 7.3778,
"eval_samples_per_second": 125.647,
"eval_steps_per_second": 3.931,
"eval_weighted-f1": 0.61941625450029,
"eval_weighted-precision": 0.6256700095561613,
"eval_weighted-recall": 0.6213592233009708,
"step": 1392
},
{
"epoch": 6.47,
"learning_rate": 3.383620689655172e-05,
"loss": 0.259,
"step": 1500
},
{
"epoch": 7.0,
"eval_accuracy": 0.6127292340884574,
"eval_loss": 1.7420923709869385,
"eval_macro-f1": 0.5600620804632168,
"eval_macro-precision": 0.580446830474752,
"eval_macro-recall": 0.5638504244142538,
"eval_runtime": 7.3417,
"eval_samples_per_second": 126.265,
"eval_steps_per_second": 3.95,
"eval_weighted-f1": 0.609536126843797,
"eval_weighted-precision": 0.6248423019885316,
"eval_weighted-recall": 0.6127292340884574,
"step": 1624
},
{
"epoch": 8.0,
"eval_accuracy": 0.6138079827400216,
"eval_loss": 1.8710674047470093,
"eval_macro-f1": 0.5822983345441143,
"eval_macro-precision": 0.5948063750427987,
"eval_macro-recall": 0.5794954217168365,
"eval_runtime": 7.3262,
"eval_samples_per_second": 126.532,
"eval_steps_per_second": 3.958,
"eval_weighted-f1": 0.6125939186272255,
"eval_weighted-precision": 0.6180044167574804,
"eval_weighted-recall": 0.6138079827400216,
"step": 1856
},
{
"epoch": 8.62,
"learning_rate": 2.844827586206897e-05,
"loss": 0.1433,
"step": 2000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6084142394822006,
"eval_loss": 1.9591827392578125,
"eval_macro-f1": 0.5632816067487086,
"eval_macro-precision": 0.5909968207291227,
"eval_macro-recall": 0.5509900757560654,
"eval_runtime": 7.3577,
"eval_samples_per_second": 125.991,
"eval_steps_per_second": 3.941,
"eval_weighted-f1": 0.6058400058395833,
"eval_weighted-precision": 0.615382962851156,
"eval_weighted-recall": 0.6084142394822006,
"step": 2088
},
{
"epoch": 10.0,
"eval_accuracy": 0.627831715210356,
"eval_loss": 1.9844281673431396,
"eval_macro-f1": 0.5720261323200381,
"eval_macro-precision": 0.6115468809046185,
"eval_macro-recall": 0.5521363552960614,
"eval_runtime": 7.2941,
"eval_samples_per_second": 127.089,
"eval_steps_per_second": 3.976,
"eval_weighted-f1": 0.6186046402157037,
"eval_weighted-precision": 0.6241715352593074,
"eval_weighted-recall": 0.627831715210356,
"step": 2320
},
{
"epoch": 10.78,
"learning_rate": 2.306034482758621e-05,
"loss": 0.0918,
"step": 2500
},
{
"epoch": 11.0,
"eval_accuracy": 0.622437971952535,
"eval_loss": 2.121650218963623,
"eval_macro-f1": 0.575328889968513,
"eval_macro-precision": 0.6046131450610978,
"eval_macro-recall": 0.5644507417595815,
"eval_runtime": 7.3602,
"eval_samples_per_second": 125.948,
"eval_steps_per_second": 3.94,
"eval_weighted-f1": 0.6210201377968305,
"eval_weighted-precision": 0.6318263677896466,
"eval_weighted-recall": 0.622437971952535,
"step": 2552
},
{
"epoch": 12.0,
"eval_accuracy": 0.6148867313915858,
"eval_loss": 2.1600427627563477,
"eval_macro-f1": 0.5634622559142987,
"eval_macro-precision": 0.5755339049051247,
"eval_macro-recall": 0.5624852754080202,
"eval_runtime": 7.3312,
"eval_samples_per_second": 126.446,
"eval_steps_per_second": 3.956,
"eval_weighted-f1": 0.611920153364688,
"eval_weighted-precision": 0.6153916861013311,
"eval_weighted-recall": 0.6148867313915858,
"step": 2784
},
{
"epoch": 12.93,
"learning_rate": 1.767241379310345e-05,
"loss": 0.0677,
"step": 3000
},
{
"epoch": 13.0,
"eval_accuracy": 0.627831715210356,
"eval_loss": 2.1390113830566406,
"eval_macro-f1": 0.5765388673761228,
"eval_macro-precision": 0.5891444176758638,
"eval_macro-recall": 0.5793730614376024,
"eval_runtime": 7.2808,
"eval_samples_per_second": 127.321,
"eval_steps_per_second": 3.983,
"eval_weighted-f1": 0.6268062992828414,
"eval_weighted-precision": 0.6385775273222236,
"eval_weighted-recall": 0.627831715210356,
"step": 3016
},
{
"epoch": 14.0,
"eval_accuracy": 0.6440129449838188,
"eval_loss": 2.148944854736328,
"eval_macro-f1": 0.6041323498671228,
"eval_macro-precision": 0.6330168044904465,
"eval_macro-recall": 0.5906969006063283,
"eval_runtime": 7.3225,
"eval_samples_per_second": 126.597,
"eval_steps_per_second": 3.96,
"eval_weighted-f1": 0.6385550648663262,
"eval_weighted-precision": 0.645638414060538,
"eval_weighted-recall": 0.6440129449838188,
"step": 3248
},
{
"epoch": 15.0,
"eval_accuracy": 0.6353829557713053,
"eval_loss": 2.176727056503296,
"eval_macro-f1": 0.5855092053952029,
"eval_macro-precision": 0.6205452521876899,
"eval_macro-recall": 0.5749431187067001,
"eval_runtime": 7.2634,
"eval_samples_per_second": 127.627,
"eval_steps_per_second": 3.993,
"eval_weighted-f1": 0.6299837224200732,
"eval_weighted-precision": 0.6440851231945072,
"eval_weighted-recall": 0.6353829557713053,
"step": 3480
},
{
"epoch": 15.09,
"learning_rate": 1.228448275862069e-05,
"loss": 0.0481,
"step": 3500
},
{
"epoch": 16.0,
"eval_accuracy": 0.6343042071197411,
"eval_loss": 2.2005436420440674,
"eval_macro-f1": 0.5953777118862926,
"eval_macro-precision": 0.6279439331751326,
"eval_macro-recall": 0.5827314498455615,
"eval_runtime": 7.3585,
"eval_samples_per_second": 125.977,
"eval_steps_per_second": 3.941,
"eval_weighted-f1": 0.6295400705432875,
"eval_weighted-precision": 0.6407972206921422,
"eval_weighted-recall": 0.6343042071197411,
"step": 3712
},
{
"epoch": 17.0,
"eval_accuracy": 0.6353829557713053,
"eval_loss": 2.2075998783111572,
"eval_macro-f1": 0.5922748273767572,
"eval_macro-precision": 0.6185327701706522,
"eval_macro-recall": 0.5831902307851583,
"eval_runtime": 7.315,
"eval_samples_per_second": 126.726,
"eval_steps_per_second": 3.964,
"eval_weighted-f1": 0.6292221063855338,
"eval_weighted-precision": 0.640744637974118,
"eval_weighted-recall": 0.6353829557713053,
"step": 3944
},
{
"epoch": 17.24,
"learning_rate": 6.896551724137932e-06,
"loss": 0.0344,
"step": 4000
},
{
"epoch": 18.0,
"eval_accuracy": 0.639697950377562,
"eval_loss": 2.186396598815918,
"eval_macro-f1": 0.5937344000879472,
"eval_macro-precision": 0.6082172147504935,
"eval_macro-recall": 0.586912298660763,
"eval_runtime": 7.2683,
"eval_samples_per_second": 127.54,
"eval_steps_per_second": 3.99,
"eval_weighted-f1": 0.6356937811215412,
"eval_weighted-precision": 0.6386402558809051,
"eval_weighted-recall": 0.639697950377562,
"step": 4176
},
{
"epoch": 19.0,
"eval_accuracy": 0.6461704422869471,
"eval_loss": 2.172349691390991,
"eval_macro-f1": 0.6048666691904897,
"eval_macro-precision": 0.6140548549534073,
"eval_macro-recall": 0.6011637605520835,
"eval_runtime": 7.3386,
"eval_samples_per_second": 126.318,
"eval_steps_per_second": 3.952,
"eval_weighted-f1": 0.6432377710619339,
"eval_weighted-precision": 0.6448782718814935,
"eval_weighted-recall": 0.6461704422869471,
"step": 4408
},
{
"epoch": 19.4,
"learning_rate": 1.5086206896551726e-06,
"loss": 0.0272,
"step": 4500
},
{
"epoch": 20.0,
"eval_accuracy": 0.6461704422869471,
"eval_loss": 2.182253837585449,
"eval_macro-f1": 0.6057453163676313,
"eval_macro-precision": 0.6251264122566944,
"eval_macro-recall": 0.5956135892129149,
"eval_runtime": 7.2862,
"eval_samples_per_second": 127.227,
"eval_steps_per_second": 3.98,
"eval_weighted-f1": 0.6423371190506697,
"eval_weighted-precision": 0.645041986434426,
"eval_weighted-recall": 0.6461704422869471,
"step": 4640
}
],
"logging_steps": 500,
"max_steps": 4640,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.90597117566976e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}