lombardata's picture
🍻 cheers
a22c567 verified
raw
history blame
No virus
40.6 kB
{
"best_metric": 0.08695908635854721,
"best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_01_24-with_data_aug_batch-size32_epochs85_freeze/checkpoint-22742",
"epoch": 85.0,
"eval_steps": 500,
"global_step": 23290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.45894224077940154,
"eval_f1_macro": 0.6395389989693074,
"eval_f1_micro": 0.7737575503857426,
"eval_loss": 0.13585977256298065,
"eval_roc_auc": 0.8471240403763409,
"eval_runtime": 675.8068,
"eval_samples_per_second": 4.253,
"eval_steps_per_second": 0.133,
"learning_rate": 0.001,
"step": 274
},
{
"epoch": 1.82,
"learning_rate": 0.001,
"loss": 0.2459,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.4940848990953375,
"eval_f1_macro": 0.7304998296932924,
"eval_f1_micro": 0.8032231694499591,
"eval_loss": 0.12362784147262573,
"eval_roc_auc": 0.8697341470820456,
"eval_runtime": 678.2974,
"eval_samples_per_second": 4.237,
"eval_steps_per_second": 0.133,
"learning_rate": 0.001,
"step": 548
},
{
"epoch": 3.0,
"eval_accuracy": 0.5125260960334029,
"eval_f1_macro": 0.7426440054746392,
"eval_f1_micro": 0.8174202432866652,
"eval_loss": 0.11671263724565506,
"eval_roc_auc": 0.8827824537503088,
"eval_runtime": 674.2849,
"eval_samples_per_second": 4.262,
"eval_steps_per_second": 0.133,
"learning_rate": 0.001,
"step": 822
},
{
"epoch": 3.65,
"learning_rate": 0.001,
"loss": 0.1403,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.5100904662491301,
"eval_f1_macro": 0.7481206268648029,
"eval_f1_micro": 0.817623068527773,
"eval_loss": 0.11555441468954086,
"eval_roc_auc": 0.8825597364016536,
"eval_runtime": 684.1218,
"eval_samples_per_second": 4.201,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 1096
},
{
"epoch": 5.0,
"eval_accuracy": 0.5243562978427279,
"eval_f1_macro": 0.7614020034586013,
"eval_f1_micro": 0.8267689489351958,
"eval_loss": 0.11359219998121262,
"eval_roc_auc": 0.8886760312325277,
"eval_runtime": 674.0166,
"eval_samples_per_second": 4.264,
"eval_steps_per_second": 0.134,
"learning_rate": 0.001,
"step": 1370
},
{
"epoch": 5.47,
"learning_rate": 0.001,
"loss": 0.1313,
"step": 1500
},
{
"epoch": 6.0,
"eval_accuracy": 0.5219206680584552,
"eval_f1_macro": 0.7508698006051816,
"eval_f1_micro": 0.8210489222998767,
"eval_loss": 0.11100047826766968,
"eval_roc_auc": 0.877677266975988,
"eval_runtime": 676.1,
"eval_samples_per_second": 4.251,
"eval_steps_per_second": 0.133,
"learning_rate": 0.001,
"step": 1644
},
{
"epoch": 7.0,
"eval_accuracy": 0.5323590814196242,
"eval_f1_macro": 0.7613673312506429,
"eval_f1_micro": 0.8288991092740292,
"eval_loss": 0.10846547037363052,
"eval_roc_auc": 0.8846228046955259,
"eval_runtime": 682.0096,
"eval_samples_per_second": 4.214,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 1918
},
{
"epoch": 7.3,
"learning_rate": 0.001,
"loss": 0.1289,
"step": 2000
},
{
"epoch": 8.0,
"eval_accuracy": 0.5379262352122477,
"eval_f1_macro": 0.7711215001442554,
"eval_f1_micro": 0.8331729408434757,
"eval_loss": 0.11005302518606186,
"eval_roc_auc": 0.8958012673255937,
"eval_runtime": 682.26,
"eval_samples_per_second": 4.212,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 2192
},
{
"epoch": 9.0,
"eval_accuracy": 0.5139178844815588,
"eval_f1_macro": 0.7669688558128348,
"eval_f1_micro": 0.8271255519076193,
"eval_loss": 0.11129175871610641,
"eval_roc_auc": 0.8924250608458335,
"eval_runtime": 683.3423,
"eval_samples_per_second": 4.206,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 2466
},
{
"epoch": 9.12,
"learning_rate": 0.001,
"loss": 0.1268,
"step": 2500
},
{
"epoch": 10.0,
"eval_accuracy": 0.5313152400835073,
"eval_f1_macro": 0.7610925982620881,
"eval_f1_micro": 0.8258011503697616,
"eval_loss": 0.11381296068429947,
"eval_roc_auc": 0.880444980112697,
"eval_runtime": 679.9943,
"eval_samples_per_second": 4.227,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 2740
},
{
"epoch": 10.95,
"learning_rate": 0.001,
"loss": 0.1255,
"step": 3000
},
{
"epoch": 11.0,
"eval_accuracy": 0.5260960334029228,
"eval_f1_macro": 0.762697586166308,
"eval_f1_micro": 0.8262265016047684,
"eval_loss": 0.11390296369791031,
"eval_roc_auc": 0.8880168466934987,
"eval_runtime": 678.1509,
"eval_samples_per_second": 4.238,
"eval_steps_per_second": 0.133,
"learning_rate": 0.001,
"step": 3014
},
{
"epoch": 12.0,
"eval_accuracy": 0.5337508698677801,
"eval_f1_macro": 0.7573087365131856,
"eval_f1_micro": 0.8210012500744092,
"eval_loss": 0.11208122968673706,
"eval_roc_auc": 0.8736066784464123,
"eval_runtime": 680.166,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 3288
},
{
"epoch": 12.77,
"learning_rate": 0.001,
"loss": 0.1253,
"step": 3500
},
{
"epoch": 13.0,
"eval_accuracy": 0.5219206680584552,
"eval_f1_macro": 0.7489136029171714,
"eval_f1_micro": 0.8207366032466399,
"eval_loss": 0.1110881045460701,
"eval_roc_auc": 0.8803454162802951,
"eval_runtime": 682.0648,
"eval_samples_per_second": 4.214,
"eval_steps_per_second": 0.132,
"learning_rate": 0.001,
"step": 3562
},
{
"epoch": 14.0,
"eval_accuracy": 0.5400139178844816,
"eval_f1_macro": 0.7776741330298375,
"eval_f1_micro": 0.8408186469584993,
"eval_loss": 0.10247301310300827,
"eval_roc_auc": 0.8987147268632997,
"eval_runtime": 676.5367,
"eval_samples_per_second": 4.248,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 3836
},
{
"epoch": 14.6,
"learning_rate": 0.0001,
"loss": 0.1171,
"step": 4000
},
{
"epoch": 15.0,
"eval_accuracy": 0.5403618649965205,
"eval_f1_macro": 0.7795139529876273,
"eval_f1_micro": 0.842865329512894,
"eval_loss": 0.0998576357960701,
"eval_roc_auc": 0.897277663148542,
"eval_runtime": 675.6889,
"eval_samples_per_second": 4.253,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 4110
},
{
"epoch": 16.0,
"eval_accuracy": 0.5407098121085595,
"eval_f1_macro": 0.7861162275453341,
"eval_f1_micro": 0.8462626605556499,
"eval_loss": 0.10081179440021515,
"eval_roc_auc": 0.9032963122022265,
"eval_runtime": 680.4113,
"eval_samples_per_second": 4.224,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 4384
},
{
"epoch": 16.42,
"learning_rate": 0.0001,
"loss": 0.1107,
"step": 4500
},
{
"epoch": 17.0,
"eval_accuracy": 0.545929018789144,
"eval_f1_macro": 0.7877890037679841,
"eval_f1_micro": 0.8474232610532244,
"eval_loss": 0.10136950016021729,
"eval_roc_auc": 0.9054715489545434,
"eval_runtime": 689.6336,
"eval_samples_per_second": 4.167,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 4658
},
{
"epoch": 18.0,
"eval_accuracy": 0.5480167014613778,
"eval_f1_macro": 0.7867996984352024,
"eval_f1_micro": 0.8471123755334281,
"eval_loss": 0.09731467068195343,
"eval_roc_auc": 0.9019535814277009,
"eval_runtime": 689.7429,
"eval_samples_per_second": 4.167,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 4932
},
{
"epoch": 18.25,
"learning_rate": 0.0001,
"loss": 0.1078,
"step": 5000
},
{
"epoch": 19.0,
"eval_accuracy": 0.5480167014613778,
"eval_f1_macro": 0.789354289479613,
"eval_f1_micro": 0.849087519068874,
"eval_loss": 0.09738590568304062,
"eval_roc_auc": 0.9053669532212902,
"eval_runtime": 687.0367,
"eval_samples_per_second": 4.183,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 5206
},
{
"epoch": 20.0,
"eval_accuracy": 0.5549756437021572,
"eval_f1_macro": 0.7947863154349663,
"eval_f1_micro": 0.8497521508745941,
"eval_loss": 0.0971071869134903,
"eval_roc_auc": 0.9029799344302967,
"eval_runtime": 693.4393,
"eval_samples_per_second": 4.145,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 5480
},
{
"epoch": 20.07,
"learning_rate": 0.0001,
"loss": 0.1061,
"step": 5500
},
{
"epoch": 21.0,
"eval_accuracy": 0.5532359081419624,
"eval_f1_macro": 0.793994619616555,
"eval_f1_micro": 0.850910726332359,
"eval_loss": 0.09643097966909409,
"eval_roc_auc": 0.908055677859469,
"eval_runtime": 689.9756,
"eval_samples_per_second": 4.165,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 5754
},
{
"epoch": 21.9,
"learning_rate": 0.0001,
"loss": 0.1048,
"step": 6000
},
{
"epoch": 22.0,
"eval_accuracy": 0.5563674321503131,
"eval_f1_macro": 0.7973736665550476,
"eval_f1_micro": 0.8519603424966201,
"eval_loss": 0.096234992146492,
"eval_roc_auc": 0.9079748210535556,
"eval_runtime": 688.8118,
"eval_samples_per_second": 4.172,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 6028
},
{
"epoch": 23.0,
"eval_accuracy": 0.558455114822547,
"eval_f1_macro": 0.7969454250638132,
"eval_f1_micro": 0.8504731861198739,
"eval_loss": 0.09601961821317673,
"eval_roc_auc": 0.9012155078858011,
"eval_runtime": 688.0026,
"eval_samples_per_second": 4.177,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 6302
},
{
"epoch": 23.72,
"learning_rate": 0.0001,
"loss": 0.1038,
"step": 6500
},
{
"epoch": 24.0,
"eval_accuracy": 0.5626304801670147,
"eval_f1_macro": 0.7974458635640262,
"eval_f1_micro": 0.8510467909850132,
"eval_loss": 0.09510745108127594,
"eval_roc_auc": 0.9024119192380319,
"eval_runtime": 688.423,
"eval_samples_per_second": 4.175,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 6576
},
{
"epoch": 25.0,
"eval_accuracy": 0.5643702157272095,
"eval_f1_macro": 0.795289513465328,
"eval_f1_micro": 0.8511713367018835,
"eval_loss": 0.0944407731294632,
"eval_roc_auc": 0.9012469687818218,
"eval_runtime": 683.8812,
"eval_samples_per_second": 4.202,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 6850
},
{
"epoch": 25.55,
"learning_rate": 0.0001,
"loss": 0.1017,
"step": 7000
},
{
"epoch": 26.0,
"eval_accuracy": 0.5640222686151705,
"eval_f1_macro": 0.8036711965439244,
"eval_f1_micro": 0.8572393605043909,
"eval_loss": 0.0948282852768898,
"eval_roc_auc": 0.9111790013806387,
"eval_runtime": 681.6858,
"eval_samples_per_second": 4.216,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 7124
},
{
"epoch": 27.0,
"eval_accuracy": 0.5636743215031316,
"eval_f1_macro": 0.8034638180358344,
"eval_f1_micro": 0.8551240743881069,
"eval_loss": 0.09229259192943573,
"eval_roc_auc": 0.9086109391822021,
"eval_runtime": 683.6776,
"eval_samples_per_second": 4.204,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 7398
},
{
"epoch": 27.37,
"learning_rate": 0.0001,
"loss": 0.1008,
"step": 7500
},
{
"epoch": 28.0,
"eval_accuracy": 0.5643702157272095,
"eval_f1_macro": 0.8072611584992022,
"eval_f1_micro": 0.8561391580259505,
"eval_loss": 0.0919216200709343,
"eval_roc_auc": 0.9083895171196321,
"eval_runtime": 676.936,
"eval_samples_per_second": 4.246,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 7672
},
{
"epoch": 29.0,
"eval_accuracy": 0.5681976339596382,
"eval_f1_macro": 0.807775544791943,
"eval_f1_micro": 0.8571590844550463,
"eval_loss": 0.09229801595211029,
"eval_roc_auc": 0.9081680950570622,
"eval_runtime": 680.3447,
"eval_samples_per_second": 4.224,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 7946
},
{
"epoch": 29.2,
"learning_rate": 0.0001,
"loss": 0.1006,
"step": 8000
},
{
"epoch": 30.0,
"eval_accuracy": 0.5636743215031316,
"eval_f1_macro": 0.8078629475879894,
"eval_f1_micro": 0.8560661454525001,
"eval_loss": 0.09243426471948624,
"eval_roc_auc": 0.9107996520688381,
"eval_runtime": 679.1764,
"eval_samples_per_second": 4.232,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 8220
},
{
"epoch": 31.0,
"eval_accuracy": 0.5688935281837161,
"eval_f1_macro": 0.8043753783436429,
"eval_f1_micro": 0.8549068890666057,
"eval_loss": 0.09250637888908386,
"eval_roc_auc": 0.9050076062220636,
"eval_runtime": 675.7031,
"eval_samples_per_second": 4.253,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 8494
},
{
"epoch": 31.02,
"learning_rate": 0.0001,
"loss": 0.0987,
"step": 8500
},
{
"epoch": 32.0,
"eval_accuracy": 0.5678496868475992,
"eval_f1_macro": 0.8071226305218325,
"eval_f1_micro": 0.858236685057989,
"eval_loss": 0.09133294969797134,
"eval_roc_auc": 0.9117040473456065,
"eval_runtime": 677.7385,
"eval_samples_per_second": 4.241,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 8768
},
{
"epoch": 32.85,
"learning_rate": 0.0001,
"loss": 0.0983,
"step": 9000
},
{
"epoch": 33.0,
"eval_accuracy": 0.5692414752957551,
"eval_f1_macro": 0.8081519622072744,
"eval_f1_micro": 0.8570938803496942,
"eval_loss": 0.09114891290664673,
"eval_roc_auc": 0.9061295874509765,
"eval_runtime": 681.1845,
"eval_samples_per_second": 4.219,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 9042
},
{
"epoch": 34.0,
"eval_accuracy": 0.5709812108559499,
"eval_f1_macro": 0.8059984375887345,
"eval_f1_micro": 0.8570447522032734,
"eval_loss": 0.09058225899934769,
"eval_roc_auc": 0.9055923606377748,
"eval_runtime": 681.0802,
"eval_samples_per_second": 4.22,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 9316
},
{
"epoch": 34.67,
"learning_rate": 0.0001,
"loss": 0.0967,
"step": 9500
},
{
"epoch": 35.0,
"eval_accuracy": 0.5692414752957551,
"eval_f1_macro": 0.8103551770491668,
"eval_f1_micro": 0.857759845428198,
"eval_loss": 0.09091359376907349,
"eval_roc_auc": 0.9083150869963146,
"eval_runtime": 683.7099,
"eval_samples_per_second": 4.204,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 9590
},
{
"epoch": 36.0,
"eval_accuracy": 0.5748086290883786,
"eval_f1_macro": 0.8114188986781382,
"eval_f1_micro": 0.8582166040314315,
"eval_loss": 0.09166968613862991,
"eval_roc_auc": 0.9079081626467485,
"eval_runtime": 677.0062,
"eval_samples_per_second": 4.245,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 9864
},
{
"epoch": 36.5,
"learning_rate": 0.0001,
"loss": 0.0963,
"step": 10000
},
{
"epoch": 37.0,
"eval_accuracy": 0.5741127348643006,
"eval_f1_macro": 0.8104359485439742,
"eval_f1_micro": 0.8571918983865431,
"eval_loss": 0.09075025469064713,
"eval_roc_auc": 0.9057496153700481,
"eval_runtime": 682.1714,
"eval_samples_per_second": 4.213,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 10138
},
{
"epoch": 38.0,
"eval_accuracy": 0.5709812108559499,
"eval_f1_macro": 0.8135949001200257,
"eval_f1_micro": 0.8594423033325777,
"eval_loss": 0.09104561805725098,
"eval_roc_auc": 0.9101469439602342,
"eval_runtime": 690.1946,
"eval_samples_per_second": 4.164,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 10412
},
{
"epoch": 38.32,
"learning_rate": 0.0001,
"loss": 0.0957,
"step": 10500
},
{
"epoch": 39.0,
"eval_accuracy": 0.5685455810716771,
"eval_f1_macro": 0.808520223441343,
"eval_f1_micro": 0.8577247270464444,
"eval_loss": 0.09074629843235016,
"eval_roc_auc": 0.9098080230513902,
"eval_runtime": 678.1058,
"eval_samples_per_second": 4.238,
"eval_steps_per_second": 0.133,
"learning_rate": 0.0001,
"step": 10686
},
{
"epoch": 40.0,
"eval_accuracy": 0.5730688935281837,
"eval_f1_macro": 0.8111504469893477,
"eval_f1_micro": 0.8592332123411979,
"eval_loss": 0.09030281752347946,
"eval_roc_auc": 0.909802268885752,
"eval_runtime": 695.8681,
"eval_samples_per_second": 4.13,
"eval_steps_per_second": 0.129,
"learning_rate": 0.0001,
"step": 10960
},
{
"epoch": 40.15,
"learning_rate": 0.0001,
"loss": 0.0953,
"step": 11000
},
{
"epoch": 41.0,
"eval_accuracy": 0.5716771050800278,
"eval_f1_macro": 0.8133805742659422,
"eval_f1_micro": 0.8586208856801775,
"eval_loss": 0.09064245969057083,
"eval_roc_auc": 0.9086828782290092,
"eval_runtime": 687.8411,
"eval_samples_per_second": 4.178,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 11234
},
{
"epoch": 41.97,
"learning_rate": 0.0001,
"loss": 0.0943,
"step": 11500
},
{
"epoch": 42.0,
"eval_accuracy": 0.5664578983994433,
"eval_f1_macro": 0.8135815799291138,
"eval_f1_micro": 0.8584246692032484,
"eval_loss": 0.09031981229782104,
"eval_roc_auc": 0.9089139403154726,
"eval_runtime": 684.2332,
"eval_samples_per_second": 4.2,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 11508
},
{
"epoch": 43.0,
"eval_accuracy": 0.569937369519833,
"eval_f1_macro": 0.8177715667121555,
"eval_f1_micro": 0.8603735373537355,
"eval_loss": 0.09048929065465927,
"eval_roc_auc": 0.9131758350455123,
"eval_runtime": 683.871,
"eval_samples_per_second": 4.203,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 11782
},
{
"epoch": 43.8,
"learning_rate": 0.0001,
"loss": 0.0947,
"step": 12000
},
{
"epoch": 44.0,
"eval_accuracy": 0.5727209464161448,
"eval_f1_macro": 0.8149031816603105,
"eval_f1_micro": 0.8585443759981747,
"eval_loss": 0.090988889336586,
"eval_roc_auc": 0.9075230591693096,
"eval_runtime": 686.4073,
"eval_samples_per_second": 4.187,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 12056
},
{
"epoch": 45.0,
"eval_accuracy": 0.5727209464161448,
"eval_f1_macro": 0.8112945515986235,
"eval_f1_micro": 0.8590971272229823,
"eval_loss": 0.09051001071929932,
"eval_roc_auc": 0.9080583679272985,
"eval_runtime": 690.3088,
"eval_samples_per_second": 4.163,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 12330
},
{
"epoch": 45.62,
"learning_rate": 0.0001,
"loss": 0.0925,
"step": 12500
},
{
"epoch": 46.0,
"eval_accuracy": 0.5727209464161448,
"eval_f1_macro": 0.8138956921603455,
"eval_f1_micro": 0.8608370193943518,
"eval_loss": 0.08959119021892548,
"eval_roc_auc": 0.9107387478276688,
"eval_runtime": 684.5538,
"eval_samples_per_second": 4.198,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 12604
},
{
"epoch": 47.0,
"eval_accuracy": 0.5744606819763396,
"eval_f1_macro": 0.8154159530277365,
"eval_f1_micro": 0.8598835217540253,
"eval_loss": 0.08953865617513657,
"eval_roc_auc": 0.9079274426945352,
"eval_runtime": 681.6068,
"eval_samples_per_second": 4.217,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 12878
},
{
"epoch": 47.45,
"learning_rate": 0.0001,
"loss": 0.0928,
"step": 13000
},
{
"epoch": 48.0,
"eval_accuracy": 0.5744606819763396,
"eval_f1_macro": 0.8154966869589858,
"eval_f1_micro": 0.8605536922289807,
"eval_loss": 0.08962185680866241,
"eval_roc_auc": 0.9097631357688805,
"eval_runtime": 684.997,
"eval_samples_per_second": 4.196,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 13152
},
{
"epoch": 49.0,
"eval_accuracy": 0.5727209464161448,
"eval_f1_macro": 0.8168754926591527,
"eval_f1_micro": 0.8606169781580725,
"eval_loss": 0.08909053355455399,
"eval_roc_auc": 0.9130853382157057,
"eval_runtime": 683.2092,
"eval_samples_per_second": 4.207,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 13426
},
{
"epoch": 49.27,
"learning_rate": 0.0001,
"loss": 0.0914,
"step": 13500
},
{
"epoch": 50.0,
"eval_accuracy": 0.5734168406402227,
"eval_f1_macro": 0.8182687784925751,
"eval_f1_micro": 0.8616618652205841,
"eval_loss": 0.08951092511415482,
"eval_roc_auc": 0.9125141096821429,
"eval_runtime": 683.8641,
"eval_samples_per_second": 4.203,
"eval_steps_per_second": 0.132,
"learning_rate": 0.0001,
"step": 13700
},
{
"epoch": 51.0,
"eval_accuracy": 0.5668058455114823,
"eval_f1_macro": 0.8184177894108883,
"eval_f1_micro": 0.8608232987958555,
"eval_loss": 0.09029122442007065,
"eval_roc_auc": 0.914931294083072,
"eval_runtime": 685.4274,
"eval_samples_per_second": 4.193,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 13974
},
{
"epoch": 51.09,
"learning_rate": 0.0001,
"loss": 0.0919,
"step": 14000
},
{
"epoch": 52.0,
"eval_accuracy": 0.5762004175365344,
"eval_f1_macro": 0.8172163352414866,
"eval_f1_micro": 0.8617045454545454,
"eval_loss": 0.09041330218315125,
"eval_roc_auc": 0.9105776569849702,
"eval_runtime": 686.3022,
"eval_samples_per_second": 4.188,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 14248
},
{
"epoch": 52.92,
"learning_rate": 0.0001,
"loss": 0.091,
"step": 14500
},
{
"epoch": 53.0,
"eval_accuracy": 0.5734168406402227,
"eval_f1_macro": 0.8154347454270638,
"eval_f1_micro": 0.8604036655984708,
"eval_loss": 0.09106075763702393,
"eval_roc_auc": 0.913401765735465,
"eval_runtime": 686.9936,
"eval_samples_per_second": 4.183,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 14522
},
{
"epoch": 54.0,
"eval_accuracy": 0.5751565762004175,
"eval_f1_macro": 0.822392587875712,
"eval_f1_micro": 0.8628963639457711,
"eval_loss": 0.09085189551115036,
"eval_roc_auc": 0.9117971844954131,
"eval_runtime": 691.549,
"eval_samples_per_second": 4.156,
"eval_steps_per_second": 0.13,
"learning_rate": 0.0001,
"step": 14796
},
{
"epoch": 54.74,
"learning_rate": 0.0001,
"loss": 0.0907,
"step": 15000
},
{
"epoch": 55.0,
"eval_accuracy": 0.5720250521920668,
"eval_f1_macro": 0.8246722143238872,
"eval_f1_micro": 0.862824401752612,
"eval_loss": 0.0893503949046135,
"eval_roc_auc": 0.9150558423810694,
"eval_runtime": 687.0743,
"eval_samples_per_second": 4.183,
"eval_steps_per_second": 0.131,
"learning_rate": 0.0001,
"step": 15070
},
{
"epoch": 56.0,
"eval_accuracy": 0.5723729993041058,
"eval_f1_macro": 0.8197285299784532,
"eval_f1_micro": 0.8613505337062617,
"eval_loss": 0.0895121842622757,
"eval_roc_auc": 0.9088388874230271,
"eval_runtime": 688.6878,
"eval_samples_per_second": 4.173,
"eval_steps_per_second": 0.131,
"learning_rate": 1e-05,
"step": 15344
},
{
"epoch": 56.57,
"learning_rate": 1e-05,
"loss": 0.0883,
"step": 15500
},
{
"epoch": 57.0,
"eval_accuracy": 0.5755045233124565,
"eval_f1_macro": 0.8261680546876228,
"eval_f1_micro": 0.8653240324032403,
"eval_loss": 0.08795319497585297,
"eval_roc_auc": 0.9159717957369441,
"eval_runtime": 680.4805,
"eval_samples_per_second": 4.223,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 15618
},
{
"epoch": 58.0,
"eval_accuracy": 0.5782881002087683,
"eval_f1_macro": 0.8227228870436498,
"eval_f1_micro": 0.8639262127078114,
"eval_loss": 0.08846761286258698,
"eval_roc_auc": 0.9111322457907458,
"eval_runtime": 678.456,
"eval_samples_per_second": 4.236,
"eval_steps_per_second": 0.133,
"learning_rate": 1e-05,
"step": 15892
},
{
"epoch": 58.39,
"learning_rate": 1e-05,
"loss": 0.0872,
"step": 16000
},
{
"epoch": 59.0,
"eval_accuracy": 0.5765483646485734,
"eval_f1_macro": 0.8262742568594247,
"eval_f1_micro": 0.8655003656409969,
"eval_loss": 0.0878983661532402,
"eval_roc_auc": 0.9160905401214736,
"eval_runtime": 680.5904,
"eval_samples_per_second": 4.223,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 16166
},
{
"epoch": 60.0,
"eval_accuracy": 0.5800278357689631,
"eval_f1_macro": 0.8238378094426198,
"eval_f1_micro": 0.8654139156932453,
"eval_loss": 0.08844566345214844,
"eval_roc_auc": 0.914969231409518,
"eval_runtime": 682.0838,
"eval_samples_per_second": 4.214,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 16440
},
{
"epoch": 60.22,
"learning_rate": 1e-05,
"loss": 0.0873,
"step": 16500
},
{
"epoch": 61.0,
"eval_accuracy": 0.5744606819763396,
"eval_f1_macro": 0.8265572971487117,
"eval_f1_micro": 0.8651893408134642,
"eval_loss": 0.0878659188747406,
"eval_roc_auc": 0.9168337948077135,
"eval_runtime": 683.217,
"eval_samples_per_second": 4.207,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 16714
},
{
"epoch": 62.0,
"eval_accuracy": 0.5765483646485734,
"eval_f1_macro": 0.8251828516128455,
"eval_f1_micro": 0.8649870071178397,
"eval_loss": 0.08799029141664505,
"eval_roc_auc": 0.9143652466938494,
"eval_runtime": 680.2736,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 16988
},
{
"epoch": 62.04,
"learning_rate": 1e-05,
"loss": 0.0864,
"step": 17000
},
{
"epoch": 63.0,
"eval_accuracy": 0.5800278357689631,
"eval_f1_macro": 0.8266891115852992,
"eval_f1_micro": 0.8650424929178471,
"eval_loss": 0.08828118443489075,
"eval_roc_auc": 0.9134011927141672,
"eval_runtime": 677.3735,
"eval_samples_per_second": 4.243,
"eval_steps_per_second": 0.133,
"learning_rate": 1e-05,
"step": 17262
},
{
"epoch": 63.87,
"learning_rate": 1e-05,
"loss": 0.086,
"step": 17500
},
{
"epoch": 64.0,
"eval_accuracy": 0.5782881002087683,
"eval_f1_macro": 0.8256635970178378,
"eval_f1_micro": 0.8667077889306342,
"eval_loss": 0.08754145354032516,
"eval_roc_auc": 0.9178472944451183,
"eval_runtime": 682.5828,
"eval_samples_per_second": 4.21,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 17536
},
{
"epoch": 65.0,
"eval_accuracy": 0.58107167710508,
"eval_f1_macro": 0.8277460823758025,
"eval_f1_micro": 0.8669750648764526,
"eval_loss": 0.08722905069589615,
"eval_roc_auc": 0.9159442206991787,
"eval_runtime": 673.5072,
"eval_samples_per_second": 4.267,
"eval_steps_per_second": 0.134,
"learning_rate": 1e-05,
"step": 17810
},
{
"epoch": 65.69,
"learning_rate": 1e-05,
"loss": 0.0855,
"step": 18000
},
{
"epoch": 66.0,
"eval_accuracy": 0.581767571329158,
"eval_f1_macro": 0.8263083392061107,
"eval_f1_micro": 0.8662405972512867,
"eval_loss": 0.0872766524553299,
"eval_roc_auc": 0.9146675753101624,
"eval_runtime": 674.2325,
"eval_samples_per_second": 4.263,
"eval_steps_per_second": 0.133,
"learning_rate": 1e-05,
"step": 18084
},
{
"epoch": 67.0,
"eval_accuracy": 0.5796798886569241,
"eval_f1_macro": 0.8236507380069967,
"eval_f1_micro": 0.8647603888351997,
"eval_loss": 0.08779256045818329,
"eval_roc_auc": 0.9121142845321298,
"eval_runtime": 672.7686,
"eval_samples_per_second": 4.272,
"eval_steps_per_second": 0.134,
"learning_rate": 1e-05,
"step": 18358
},
{
"epoch": 67.52,
"learning_rate": 1e-05,
"loss": 0.0853,
"step": 18500
},
{
"epoch": 68.0,
"eval_accuracy": 0.580723729993041,
"eval_f1_macro": 0.82334160354742,
"eval_f1_micro": 0.8644058136221144,
"eval_loss": 0.08787883818149567,
"eval_roc_auc": 0.9110366175644288,
"eval_runtime": 678.5717,
"eval_samples_per_second": 4.235,
"eval_steps_per_second": 0.133,
"learning_rate": 1e-05,
"step": 18632
},
{
"epoch": 69.0,
"eval_accuracy": 0.5831593597773138,
"eval_f1_macro": 0.8274164123414606,
"eval_f1_micro": 0.8653988078342322,
"eval_loss": 0.08730249851942062,
"eval_roc_auc": 0.9129307238034322,
"eval_runtime": 682.302,
"eval_samples_per_second": 4.212,
"eval_steps_per_second": 0.132,
"learning_rate": 1e-05,
"step": 18906
},
{
"epoch": 69.34,
"learning_rate": 1e-05,
"loss": 0.0854,
"step": 19000
},
{
"epoch": 70.0,
"eval_accuracy": 0.58107167710508,
"eval_f1_macro": 0.8286701109278063,
"eval_f1_micro": 0.8661381908135155,
"eval_loss": 0.08733326941728592,
"eval_roc_auc": 0.9166425383550794,
"eval_runtime": 673.3186,
"eval_samples_per_second": 4.268,
"eval_steps_per_second": 0.134,
"learning_rate": 1e-05,
"step": 19180
},
{
"epoch": 71.0,
"eval_accuracy": 0.5779401530967293,
"eval_f1_macro": 0.8262073521627441,
"eval_f1_micro": 0.865708650324035,
"eval_loss": 0.08731996268033981,
"eval_roc_auc": 0.9155950369973136,
"eval_runtime": 672.8744,
"eval_samples_per_second": 4.271,
"eval_steps_per_second": 0.134,
"learning_rate": 1e-05,
"step": 19454
},
{
"epoch": 71.17,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0847,
"step": 19500
},
{
"epoch": 72.0,
"eval_accuracy": 0.5803757828810021,
"eval_f1_macro": 0.8279492189021646,
"eval_f1_micro": 0.8660418654245468,
"eval_loss": 0.08729101717472076,
"eval_roc_auc": 0.9172015860404081,
"eval_runtime": 676.9231,
"eval_samples_per_second": 4.246,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 19728
},
{
"epoch": 72.99,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0852,
"step": 20000
},
{
"epoch": 73.0,
"eval_accuracy": 0.5765483646485734,
"eval_f1_macro": 0.8258696329291023,
"eval_f1_micro": 0.8661956034096008,
"eval_loss": 0.08899407833814621,
"eval_roc_auc": 0.917537916377082,
"eval_runtime": 674.8648,
"eval_samples_per_second": 4.259,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 20002
},
{
"epoch": 74.0,
"eval_accuracy": 0.5835073068893528,
"eval_f1_macro": 0.8266751443826955,
"eval_f1_micro": 0.8663119764546072,
"eval_loss": 0.08706125617027283,
"eval_roc_auc": 0.9144583340958263,
"eval_runtime": 676.3788,
"eval_samples_per_second": 4.249,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 20276
},
{
"epoch": 74.82,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0845,
"step": 20500
},
{
"epoch": 75.0,
"eval_accuracy": 0.5762004175365344,
"eval_f1_macro": 0.8242525331164202,
"eval_f1_micro": 0.8650994982806247,
"eval_loss": 0.08718431740999222,
"eval_roc_auc": 0.9151367489348123,
"eval_runtime": 674.1856,
"eval_samples_per_second": 4.263,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 20550
},
{
"epoch": 76.0,
"eval_accuracy": 0.5775922059846903,
"eval_f1_macro": 0.8258404959868192,
"eval_f1_micro": 0.8660362490149724,
"eval_loss": 0.08712752908468246,
"eval_roc_auc": 0.9161823322373652,
"eval_runtime": 676.0536,
"eval_samples_per_second": 4.251,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 20824
},
{
"epoch": 76.64,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0849,
"step": 21000
},
{
"epoch": 77.0,
"eval_accuracy": 0.5779401530967293,
"eval_f1_macro": 0.8262597281814207,
"eval_f1_micro": 0.8654561858576745,
"eval_loss": 0.08787967264652252,
"eval_roc_auc": 0.915242017185023,
"eval_runtime": 678.4216,
"eval_samples_per_second": 4.236,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 21098
},
{
"epoch": 78.0,
"eval_accuracy": 0.5779401530967293,
"eval_f1_macro": 0.824064674812195,
"eval_f1_micro": 0.8647364849581541,
"eval_loss": 0.08832630515098572,
"eval_roc_auc": 0.9138800063627106,
"eval_runtime": 674.504,
"eval_samples_per_second": 4.261,
"eval_steps_per_second": 0.133,
"learning_rate": 1.0000000000000002e-06,
"step": 21372
},
{
"epoch": 78.47,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0853,
"step": 21500
},
{
"epoch": 79.0,
"eval_accuracy": 0.580723729993041,
"eval_f1_macro": 0.8283767069034536,
"eval_f1_micro": 0.8667153859126425,
"eval_loss": 0.08727473765611649,
"eval_roc_auc": 0.9170071162464759,
"eval_runtime": 680.1183,
"eval_samples_per_second": 4.226,
"eval_steps_per_second": 0.132,
"learning_rate": 1.0000000000000002e-06,
"step": 21646
},
{
"epoch": 80.0,
"eval_accuracy": 0.581419624217119,
"eval_f1_macro": 0.8257519474670673,
"eval_f1_micro": 0.8654216185625353,
"eval_loss": 0.08734780550003052,
"eval_roc_auc": 0.9139968326920274,
"eval_runtime": 682.9935,
"eval_samples_per_second": 4.208,
"eval_steps_per_second": 0.132,
"learning_rate": 1.0000000000000002e-06,
"step": 21920
},
{
"epoch": 80.29,
"learning_rate": 1.0000000000000002e-07,
"loss": 0.0838,
"step": 22000
},
{
"epoch": 81.0,
"eval_accuracy": 0.5828114126652749,
"eval_f1_macro": 0.8261813753948223,
"eval_f1_micro": 0.8653922514039366,
"eval_loss": 0.08708538860082626,
"eval_roc_auc": 0.9131951648411291,
"eval_runtime": 690.614,
"eval_samples_per_second": 4.162,
"eval_steps_per_second": 0.13,
"learning_rate": 1.0000000000000002e-07,
"step": 22194
},
{
"epoch": 82.0,
"eval_accuracy": 0.581767571329158,
"eval_f1_macro": 0.8253000981325144,
"eval_f1_micro": 0.866888801039137,
"eval_loss": 0.08740255981683731,
"eval_roc_auc": 0.9155308696670169,
"eval_runtime": 680.0034,
"eval_samples_per_second": 4.226,
"eval_steps_per_second": 0.132,
"learning_rate": 1.0000000000000002e-07,
"step": 22468
},
{
"epoch": 82.12,
"learning_rate": 1.0000000000000002e-07,
"loss": 0.0842,
"step": 22500
},
{
"epoch": 83.0,
"eval_accuracy": 0.5845511482254697,
"eval_f1_macro": 0.8282173993454429,
"eval_f1_micro": 0.8666929710839298,
"eval_loss": 0.08695908635854721,
"eval_roc_auc": 0.9160732278767293,
"eval_runtime": 685.2501,
"eval_samples_per_second": 4.194,
"eval_steps_per_second": 0.131,
"learning_rate": 1.0000000000000002e-07,
"step": 22742
},
{
"epoch": 83.94,
"learning_rate": 1.0000000000000002e-07,
"loss": 0.0837,
"step": 23000
},
{
"epoch": 84.0,
"eval_accuracy": 0.58107167710508,
"eval_f1_macro": 0.8233437650206237,
"eval_f1_micro": 0.8627316009866345,
"eval_loss": 0.08810650557279587,
"eval_roc_auc": 0.9079679208453217,
"eval_runtime": 687.4756,
"eval_samples_per_second": 4.181,
"eval_steps_per_second": 0.131,
"learning_rate": 1.0000000000000002e-07,
"step": 23016
},
{
"epoch": 85.0,
"eval_accuracy": 0.580723729993041,
"eval_f1_macro": 0.8276925304690478,
"eval_f1_micro": 0.8657459814353634,
"eval_loss": 0.08707784116268158,
"eval_roc_auc": 0.9141406112899818,
"eval_runtime": 688.8064,
"eval_samples_per_second": 4.172,
"eval_steps_per_second": 0.131,
"learning_rate": 1.0000000000000002e-07,
"step": 23290
},
{
"epoch": 85.0,
"learning_rate": 1.0000000000000002e-07,
"step": 23290,
"total_flos": 1.1045912459199104e+21,
"train_loss": 0.0019856175725272715,
"train_runtime": 5622.9029,
"train_samples_per_second": 132.483,
"train_steps_per_second": 4.142
}
],
"logging_steps": 500,
"max_steps": 23290,
"num_input_tokens_seen": 0,
"num_train_epochs": 85,
"save_steps": 500,
"total_flos": 1.1045912459199104e+21,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}