|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"global_step": 350,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0,
|
|
"step": 0,
|
|
"train_loss": 0.9102265238761902
|
|
},
|
|
{
|
|
"epoch": 0,
|
|
"step": 0,
|
|
"train_loss": 0.7791410684585571
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.942857142857143e-05,
|
|
"loss": 0.6171,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"step": 20,
|
|
"train_loss": 0.9948083162307739
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"step": 20,
|
|
"train_loss": 0.43645456433296204
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.885714285714286e-05,
|
|
"loss": 0.5985,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"step": 40,
|
|
"train_loss": 0.6493304967880249
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"step": 40,
|
|
"train_loss": 0.44140735268592834
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.8285714285714288e-05,
|
|
"loss": 0.5388,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"step": 60,
|
|
"train_loss": 0.5935375690460205
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"step": 60,
|
|
"train_loss": 0.7153045535087585
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": {
|
|
"accuracy": 0.8339285714285715
|
|
},
|
|
"eval_auc": 0.7155102040816327,
|
|
"eval_f1": {
|
|
"f1": 0.3404255319148936
|
|
},
|
|
"eval_loss": 0.6475747227668762,
|
|
"eval_precision": {
|
|
"precision": 0.3380281690140845
|
|
},
|
|
"eval_recall": {
|
|
"recall": 0.34285714285714286
|
|
},
|
|
"eval_runtime": 0.6529,
|
|
"eval_samples_per_second": 857.718,
|
|
"eval_steps_per_second": 53.607,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.7714285714285717e-05,
|
|
"loss": 0.5155,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"step": 80,
|
|
"train_loss": 0.30939409136772156
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"step": 80,
|
|
"train_loss": 0.46914660930633545
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.4106,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"step": 100,
|
|
"train_loss": 0.5445544719696045
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"step": 100,
|
|
"train_loss": 0.2606019377708435
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 1.6571428571428574e-05,
|
|
"loss": 0.4368,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"step": 120,
|
|
"train_loss": 0.3977287709712982
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"step": 120,
|
|
"train_loss": 0.27664005756378174
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.3439,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.281027227640152
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.8136294484138489
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.12487435340881348
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.4965817332267761
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.10273457318544388
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.4042325019836426
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.18423768877983093
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.2865528464317322
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.6649780869483948
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.187837615609169
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.5332860946655273
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.3017665147781372
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.17419536411762238
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.21602007746696472
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.19301216304302216
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.7078395485877991
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.2670217752456665
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.3781671524047852
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.1845088005065918
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.5374538898468018
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.8708707690238953
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.8791667222976685
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.30802324414253235
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.2370085716247559
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.2875761985778809
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.280977487564087
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.4434828758239746
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.9846087098121643
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.3032301068305969
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.22137752175331116
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.8634898066520691
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 1.080783486366272
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.8349682092666626
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.6795739531517029
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.676697850227356
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": {
|
|
"accuracy": 0.8428571428571429
|
|
},
|
|
"eval_auc": 0.7470845481049562,
|
|
"eval_f1": {
|
|
"f1": 0.3802816901408451
|
|
},
|
|
"eval_loss": 0.7140511870384216,
|
|
"eval_precision": {
|
|
"precision": 0.375
|
|
},
|
|
"eval_recall": {
|
|
"recall": 0.38571428571428573
|
|
},
|
|
"eval_runtime": 0.8142,
|
|
"eval_samples_per_second": 687.816,
|
|
"eval_steps_per_second": 42.989,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.18269318342208862
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"step": 140,
|
|
"train_loss": 0.16265031695365906
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 1.542857142857143e-05,
|
|
"loss": 0.22,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"step": 160,
|
|
"train_loss": 0.15252527594566345
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"step": 160,
|
|
"train_loss": 0.26980623602867126
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.4857142857142858e-05,
|
|
"loss": 0.2508,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"step": 180,
|
|
"train_loss": 0.1389356404542923
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"step": 180,
|
|
"train_loss": 0.07751139253377914
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.4285714285714287e-05,
|
|
"loss": 0.2149,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"step": 200,
|
|
"train_loss": 0.13591702282428741
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"step": 200,
|
|
"train_loss": 0.04827806353569031
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": {
|
|
"accuracy": 0.8232142857142857
|
|
},
|
|
"eval_auc": 0.7547521865889213,
|
|
"eval_f1": {
|
|
"f1": 0.3926380368098159
|
|
},
|
|
"eval_loss": 0.9247345924377441,
|
|
"eval_precision": {
|
|
"precision": 0.34408602150537637
|
|
},
|
|
"eval_recall": {
|
|
"recall": 0.45714285714285713
|
|
},
|
|
"eval_runtime": 0.6565,
|
|
"eval_samples_per_second": 852.978,
|
|
"eval_steps_per_second": 53.311,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 1.3714285714285716e-05,
|
|
"loss": 0.2083,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"step": 220,
|
|
"train_loss": 0.08572366833686829
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"step": 220,
|
|
"train_loss": 0.16466383635997772
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.3142857142857145e-05,
|
|
"loss": 0.1521,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"step": 240,
|
|
"train_loss": 0.4424760043621063
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"step": 240,
|
|
"train_loss": 0.0380471907556057
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 1.2571428571428572e-05,
|
|
"loss": 0.1872,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"step": 260,
|
|
"train_loss": 0.4089410901069641
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"step": 260,
|
|
"train_loss": 0.2289683222770691
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 1.2e-05,
|
|
"loss": 0.1486,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.4819692373275757
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.6059560775756836
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.02861696481704712
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.6295557618141174
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.011990266852080822
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.8871378302574158
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.96287602186203
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.368556946516037
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.9895154237747192
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.11174698173999786
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.6365806460380554
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.9894587993621826
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.27545443177223206
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.15115408599376678
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.1443568766117096
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.6999047994613647
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.7545350790023804
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.3427138328552246
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.2553696632385254
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.8793208599090576
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.293062686920166
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.2954611778259277
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.26605361700057983
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.2778561115264893
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.4143030643463135
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.61956524848938
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 2.9289610385894775
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.7221819162368774
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.3664304316043854
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.2494506984949112
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.011549472808838
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.9930472373962402
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.8549365997314453
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.246674656867981
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 1.2460875511169434
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": {
|
|
"accuracy": 0.8375
|
|
},
|
|
"eval_auc": 0.7523032069970845,
|
|
"eval_f1": {
|
|
"f1": 0.35460992907801414
|
|
},
|
|
"eval_loss": 1.1712112426757812,
|
|
"eval_precision": {
|
|
"precision": 0.352112676056338
|
|
},
|
|
"eval_recall": {
|
|
"recall": 0.35714285714285715
|
|
},
|
|
"eval_runtime": 0.8151,
|
|
"eval_samples_per_second": 687.069,
|
|
"eval_steps_per_second": 42.942,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.1625460833311081
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 280,
|
|
"train_loss": 0.25639742612838745
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.1546,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"step": 300,
|
|
"train_loss": 0.030452944338321686
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"step": 300,
|
|
"train_loss": 0.010571416467428207
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 1.0857142857142858e-05,
|
|
"loss": 0.1151,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"step": 320,
|
|
"train_loss": 0.11510075628757477
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"step": 320,
|
|
"train_loss": 0.19758647680282593
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 1.0285714285714285e-05,
|
|
"loss": 0.1334,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"step": 340,
|
|
"train_loss": 0.25014737248420715
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"step": 340,
|
|
"train_loss": 0.019543316215276718
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": {
|
|
"accuracy": 0.8482142857142857
|
|
},
|
|
"eval_auc": 0.7345772594752188,
|
|
"eval_f1": {
|
|
"f1": 0.32
|
|
},
|
|
"eval_loss": 1.4309738874435425,
|
|
"eval_precision": {
|
|
"precision": 0.36363636363636365
|
|
},
|
|
"eval_recall": {
|
|
"recall": 0.2857142857142857
|
|
},
|
|
"eval_runtime": 0.6857,
|
|
"eval_samples_per_second": 816.675,
|
|
"eval_steps_per_second": 51.042,
|
|
"step": 350
|
|
}
|
|
],
|
|
"max_steps": 700,
|
|
"num_train_epochs": 10,
|
|
"total_flos": 161884215698040.0,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|