|
{ |
|
"best_metric": 0.6057453163676313, |
|
"best_model_checkpoint": "../saved_model/tibetan-bert_tncc-title_v3/checkpoint-4640", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 4640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5717367853290184, |
|
"eval_loss": 1.3059929609298706, |
|
"eval_macro-f1": 0.4599932337899495, |
|
"eval_macro-precision": 0.5198165980322329, |
|
"eval_macro-recall": 0.47524513594607626, |
|
"eval_runtime": 7.3569, |
|
"eval_samples_per_second": 126.004, |
|
"eval_steps_per_second": 3.942, |
|
"eval_weighted-f1": 0.5542294574300444, |
|
"eval_weighted-precision": 0.591566389333376, |
|
"eval_weighted-recall": 0.5717367853290184, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6148867313915858, |
|
"eval_loss": 1.2135106325149536, |
|
"eval_macro-f1": 0.5584398853408242, |
|
"eval_macro-precision": 0.5749611619851209, |
|
"eval_macro-recall": 0.5695895916407867, |
|
"eval_runtime": 7.3464, |
|
"eval_samples_per_second": 126.185, |
|
"eval_steps_per_second": 3.948, |
|
"eval_weighted-f1": 0.608666635280638, |
|
"eval_weighted-precision": 0.6187269920143599, |
|
"eval_weighted-recall": 0.6148867313915858, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.461206896551724e-05, |
|
"loss": 1.2716, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6170442286947141, |
|
"eval_loss": 1.2330070734024048, |
|
"eval_macro-f1": 0.5516603706806994, |
|
"eval_macro-precision": 0.575159910949547, |
|
"eval_macro-recall": 0.5503575961333385, |
|
"eval_runtime": 7.2744, |
|
"eval_samples_per_second": 127.433, |
|
"eval_steps_per_second": 3.987, |
|
"eval_weighted-f1": 0.6100508919269417, |
|
"eval_weighted-precision": 0.6247775875728347, |
|
"eval_weighted-recall": 0.6170442286947141, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6192017259978425, |
|
"eval_loss": 1.3646526336669922, |
|
"eval_macro-f1": 0.5686376693802021, |
|
"eval_macro-precision": 0.5941264267230656, |
|
"eval_macro-recall": 0.5623900177716177, |
|
"eval_runtime": 7.3362, |
|
"eval_samples_per_second": 126.359, |
|
"eval_steps_per_second": 3.953, |
|
"eval_weighted-f1": 0.6165925170578278, |
|
"eval_weighted-precision": 0.6295963103897588, |
|
"eval_weighted-recall": 0.6192017259978425, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.922413793103448e-05, |
|
"loss": 0.5952, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6256742179072277, |
|
"eval_loss": 1.435981035232544, |
|
"eval_macro-f1": 0.5864237155073179, |
|
"eval_macro-precision": 0.641734562145522, |
|
"eval_macro-recall": 0.5708896457267656, |
|
"eval_runtime": 7.3366, |
|
"eval_samples_per_second": 126.353, |
|
"eval_steps_per_second": 3.953, |
|
"eval_weighted-f1": 0.6206257136654274, |
|
"eval_weighted-precision": 0.6340957047257687, |
|
"eval_weighted-recall": 0.6256742179072277, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6213592233009708, |
|
"eval_loss": 1.639809489250183, |
|
"eval_macro-f1": 0.5665517506857142, |
|
"eval_macro-precision": 0.579234133027197, |
|
"eval_macro-recall": 0.5645134686065383, |
|
"eval_runtime": 7.3778, |
|
"eval_samples_per_second": 125.647, |
|
"eval_steps_per_second": 3.931, |
|
"eval_weighted-f1": 0.61941625450029, |
|
"eval_weighted-precision": 0.6256700095561613, |
|
"eval_weighted-recall": 0.6213592233009708, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.383620689655172e-05, |
|
"loss": 0.259, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6127292340884574, |
|
"eval_loss": 1.7420923709869385, |
|
"eval_macro-f1": 0.5600620804632168, |
|
"eval_macro-precision": 0.580446830474752, |
|
"eval_macro-recall": 0.5638504244142538, |
|
"eval_runtime": 7.3417, |
|
"eval_samples_per_second": 126.265, |
|
"eval_steps_per_second": 3.95, |
|
"eval_weighted-f1": 0.609536126843797, |
|
"eval_weighted-precision": 0.6248423019885316, |
|
"eval_weighted-recall": 0.6127292340884574, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6138079827400216, |
|
"eval_loss": 1.8710674047470093, |
|
"eval_macro-f1": 0.5822983345441143, |
|
"eval_macro-precision": 0.5948063750427987, |
|
"eval_macro-recall": 0.5794954217168365, |
|
"eval_runtime": 7.3262, |
|
"eval_samples_per_second": 126.532, |
|
"eval_steps_per_second": 3.958, |
|
"eval_weighted-f1": 0.6125939186272255, |
|
"eval_weighted-precision": 0.6180044167574804, |
|
"eval_weighted-recall": 0.6138079827400216, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 2.844827586206897e-05, |
|
"loss": 0.1433, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6084142394822006, |
|
"eval_loss": 1.9591827392578125, |
|
"eval_macro-f1": 0.5632816067487086, |
|
"eval_macro-precision": 0.5909968207291227, |
|
"eval_macro-recall": 0.5509900757560654, |
|
"eval_runtime": 7.3577, |
|
"eval_samples_per_second": 125.991, |
|
"eval_steps_per_second": 3.941, |
|
"eval_weighted-f1": 0.6058400058395833, |
|
"eval_weighted-precision": 0.615382962851156, |
|
"eval_weighted-recall": 0.6084142394822006, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.627831715210356, |
|
"eval_loss": 1.9844281673431396, |
|
"eval_macro-f1": 0.5720261323200381, |
|
"eval_macro-precision": 0.6115468809046185, |
|
"eval_macro-recall": 0.5521363552960614, |
|
"eval_runtime": 7.2941, |
|
"eval_samples_per_second": 127.089, |
|
"eval_steps_per_second": 3.976, |
|
"eval_weighted-f1": 0.6186046402157037, |
|
"eval_weighted-precision": 0.6241715352593074, |
|
"eval_weighted-recall": 0.627831715210356, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 2.306034482758621e-05, |
|
"loss": 0.0918, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.622437971952535, |
|
"eval_loss": 2.121650218963623, |
|
"eval_macro-f1": 0.575328889968513, |
|
"eval_macro-precision": 0.6046131450610978, |
|
"eval_macro-recall": 0.5644507417595815, |
|
"eval_runtime": 7.3602, |
|
"eval_samples_per_second": 125.948, |
|
"eval_steps_per_second": 3.94, |
|
"eval_weighted-f1": 0.6210201377968305, |
|
"eval_weighted-precision": 0.6318263677896466, |
|
"eval_weighted-recall": 0.622437971952535, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6148867313915858, |
|
"eval_loss": 2.1600427627563477, |
|
"eval_macro-f1": 0.5634622559142987, |
|
"eval_macro-precision": 0.5755339049051247, |
|
"eval_macro-recall": 0.5624852754080202, |
|
"eval_runtime": 7.3312, |
|
"eval_samples_per_second": 126.446, |
|
"eval_steps_per_second": 3.956, |
|
"eval_weighted-f1": 0.611920153364688, |
|
"eval_weighted-precision": 0.6153916861013311, |
|
"eval_weighted-recall": 0.6148867313915858, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.767241379310345e-05, |
|
"loss": 0.0677, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.627831715210356, |
|
"eval_loss": 2.1390113830566406, |
|
"eval_macro-f1": 0.5765388673761228, |
|
"eval_macro-precision": 0.5891444176758638, |
|
"eval_macro-recall": 0.5793730614376024, |
|
"eval_runtime": 7.2808, |
|
"eval_samples_per_second": 127.321, |
|
"eval_steps_per_second": 3.983, |
|
"eval_weighted-f1": 0.6268062992828414, |
|
"eval_weighted-precision": 0.6385775273222236, |
|
"eval_weighted-recall": 0.627831715210356, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6440129449838188, |
|
"eval_loss": 2.148944854736328, |
|
"eval_macro-f1": 0.6041323498671228, |
|
"eval_macro-precision": 0.6330168044904465, |
|
"eval_macro-recall": 0.5906969006063283, |
|
"eval_runtime": 7.3225, |
|
"eval_samples_per_second": 126.597, |
|
"eval_steps_per_second": 3.96, |
|
"eval_weighted-f1": 0.6385550648663262, |
|
"eval_weighted-precision": 0.645638414060538, |
|
"eval_weighted-recall": 0.6440129449838188, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6353829557713053, |
|
"eval_loss": 2.176727056503296, |
|
"eval_macro-f1": 0.5855092053952029, |
|
"eval_macro-precision": 0.6205452521876899, |
|
"eval_macro-recall": 0.5749431187067001, |
|
"eval_runtime": 7.2634, |
|
"eval_samples_per_second": 127.627, |
|
"eval_steps_per_second": 3.993, |
|
"eval_weighted-f1": 0.6299837224200732, |
|
"eval_weighted-precision": 0.6440851231945072, |
|
"eval_weighted-recall": 0.6353829557713053, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.228448275862069e-05, |
|
"loss": 0.0481, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6343042071197411, |
|
"eval_loss": 2.2005436420440674, |
|
"eval_macro-f1": 0.5953777118862926, |
|
"eval_macro-precision": 0.6279439331751326, |
|
"eval_macro-recall": 0.5827314498455615, |
|
"eval_runtime": 7.3585, |
|
"eval_samples_per_second": 125.977, |
|
"eval_steps_per_second": 3.941, |
|
"eval_weighted-f1": 0.6295400705432875, |
|
"eval_weighted-precision": 0.6407972206921422, |
|
"eval_weighted-recall": 0.6343042071197411, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6353829557713053, |
|
"eval_loss": 2.2075998783111572, |
|
"eval_macro-f1": 0.5922748273767572, |
|
"eval_macro-precision": 0.6185327701706522, |
|
"eval_macro-recall": 0.5831902307851583, |
|
"eval_runtime": 7.315, |
|
"eval_samples_per_second": 126.726, |
|
"eval_steps_per_second": 3.964, |
|
"eval_weighted-f1": 0.6292221063855338, |
|
"eval_weighted-precision": 0.640744637974118, |
|
"eval_weighted-recall": 0.6353829557713053, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 0.0344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.639697950377562, |
|
"eval_loss": 2.186396598815918, |
|
"eval_macro-f1": 0.5937344000879472, |
|
"eval_macro-precision": 0.6082172147504935, |
|
"eval_macro-recall": 0.586912298660763, |
|
"eval_runtime": 7.2683, |
|
"eval_samples_per_second": 127.54, |
|
"eval_steps_per_second": 3.99, |
|
"eval_weighted-f1": 0.6356937811215412, |
|
"eval_weighted-precision": 0.6386402558809051, |
|
"eval_weighted-recall": 0.639697950377562, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6461704422869471, |
|
"eval_loss": 2.172349691390991, |
|
"eval_macro-f1": 0.6048666691904897, |
|
"eval_macro-precision": 0.6140548549534073, |
|
"eval_macro-recall": 0.6011637605520835, |
|
"eval_runtime": 7.3386, |
|
"eval_samples_per_second": 126.318, |
|
"eval_steps_per_second": 3.952, |
|
"eval_weighted-f1": 0.6432377710619339, |
|
"eval_weighted-precision": 0.6448782718814935, |
|
"eval_weighted-recall": 0.6461704422869471, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 1.5086206896551726e-06, |
|
"loss": 0.0272, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6461704422869471, |
|
"eval_loss": 2.182253837585449, |
|
"eval_macro-f1": 0.6057453163676313, |
|
"eval_macro-precision": 0.6251264122566944, |
|
"eval_macro-recall": 0.5956135892129149, |
|
"eval_runtime": 7.2862, |
|
"eval_samples_per_second": 127.227, |
|
"eval_steps_per_second": 3.98, |
|
"eval_weighted-f1": 0.6423371190506697, |
|
"eval_weighted-precision": 0.645041986434426, |
|
"eval_weighted-recall": 0.6461704422869471, |
|
"step": 4640 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.90597117566976e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|