|
{ |
|
"best_metric": 0.08695908635854721, |
|
"best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_01_24-with_data_aug_batch-size32_epochs85_freeze/checkpoint-22742", |
|
"epoch": 93.0, |
|
"eval_steps": 500, |
|
"global_step": 25482, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.45894224077940154, |
|
"eval_f1_macro": 0.6395389989693074, |
|
"eval_f1_micro": 0.7737575503857426, |
|
"eval_loss": 0.13585977256298065, |
|
"eval_roc_auc": 0.8471240403763409, |
|
"eval_runtime": 675.8068, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.2459, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4940848990953375, |
|
"eval_f1_macro": 0.7304998296932924, |
|
"eval_f1_micro": 0.8032231694499591, |
|
"eval_loss": 0.12362784147262573, |
|
"eval_roc_auc": 0.8697341470820456, |
|
"eval_runtime": 678.2974, |
|
"eval_samples_per_second": 4.237, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5125260960334029, |
|
"eval_f1_macro": 0.7426440054746392, |
|
"eval_f1_micro": 0.8174202432866652, |
|
"eval_loss": 0.11671263724565506, |
|
"eval_roc_auc": 0.8827824537503088, |
|
"eval_runtime": 674.2849, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.001, |
|
"loss": 0.1403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5100904662491301, |
|
"eval_f1_macro": 0.7481206268648029, |
|
"eval_f1_micro": 0.817623068527773, |
|
"eval_loss": 0.11555441468954086, |
|
"eval_roc_auc": 0.8825597364016536, |
|
"eval_runtime": 684.1218, |
|
"eval_samples_per_second": 4.201, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5243562978427279, |
|
"eval_f1_macro": 0.7614020034586013, |
|
"eval_f1_micro": 0.8267689489351958, |
|
"eval_loss": 0.11359219998121262, |
|
"eval_roc_auc": 0.8886760312325277, |
|
"eval_runtime": 674.0166, |
|
"eval_samples_per_second": 4.264, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.001, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.001, |
|
"loss": 0.1313, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5219206680584552, |
|
"eval_f1_macro": 0.7508698006051816, |
|
"eval_f1_micro": 0.8210489222998767, |
|
"eval_loss": 0.11100047826766968, |
|
"eval_roc_auc": 0.877677266975988, |
|
"eval_runtime": 676.1, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5323590814196242, |
|
"eval_f1_macro": 0.7613673312506429, |
|
"eval_f1_micro": 0.8288991092740292, |
|
"eval_loss": 0.10846547037363052, |
|
"eval_roc_auc": 0.8846228046955259, |
|
"eval_runtime": 682.0096, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.1289, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5379262352122477, |
|
"eval_f1_macro": 0.7711215001442554, |
|
"eval_f1_micro": 0.8331729408434757, |
|
"eval_loss": 0.11005302518606186, |
|
"eval_roc_auc": 0.8958012673255937, |
|
"eval_runtime": 682.26, |
|
"eval_samples_per_second": 4.212, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5139178844815588, |
|
"eval_f1_macro": 0.7669688558128348, |
|
"eval_f1_micro": 0.8271255519076193, |
|
"eval_loss": 0.11129175871610641, |
|
"eval_roc_auc": 0.8924250608458335, |
|
"eval_runtime": 683.3423, |
|
"eval_samples_per_second": 4.206, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.1268, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5313152400835073, |
|
"eval_f1_macro": 0.7610925982620881, |
|
"eval_f1_micro": 0.8258011503697616, |
|
"eval_loss": 0.11381296068429947, |
|
"eval_roc_auc": 0.880444980112697, |
|
"eval_runtime": 679.9943, |
|
"eval_samples_per_second": 4.227, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.1255, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5260960334029228, |
|
"eval_f1_macro": 0.762697586166308, |
|
"eval_f1_micro": 0.8262265016047684, |
|
"eval_loss": 0.11390296369791031, |
|
"eval_roc_auc": 0.8880168466934987, |
|
"eval_runtime": 678.1509, |
|
"eval_samples_per_second": 4.238, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 3014 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5337508698677801, |
|
"eval_f1_macro": 0.7573087365131856, |
|
"eval_f1_micro": 0.8210012500744092, |
|
"eval_loss": 0.11208122968673706, |
|
"eval_roc_auc": 0.8736066784464123, |
|
"eval_runtime": 680.166, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.1253, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5219206680584552, |
|
"eval_f1_macro": 0.7489136029171714, |
|
"eval_f1_micro": 0.8207366032466399, |
|
"eval_loss": 0.1110881045460701, |
|
"eval_roc_auc": 0.8803454162802951, |
|
"eval_runtime": 682.0648, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5400139178844816, |
|
"eval_f1_macro": 0.7776741330298375, |
|
"eval_f1_micro": 0.8408186469584993, |
|
"eval_loss": 0.10247301310300827, |
|
"eval_roc_auc": 0.8987147268632997, |
|
"eval_runtime": 676.5367, |
|
"eval_samples_per_second": 4.248, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1171, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5403618649965205, |
|
"eval_f1_macro": 0.7795139529876273, |
|
"eval_f1_micro": 0.842865329512894, |
|
"eval_loss": 0.0998576357960701, |
|
"eval_roc_auc": 0.897277663148542, |
|
"eval_runtime": 675.6889, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5407098121085595, |
|
"eval_f1_macro": 0.7861162275453341, |
|
"eval_f1_micro": 0.8462626605556499, |
|
"eval_loss": 0.10081179440021515, |
|
"eval_roc_auc": 0.9032963122022265, |
|
"eval_runtime": 680.4113, |
|
"eval_samples_per_second": 4.224, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1107, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.545929018789144, |
|
"eval_f1_macro": 0.7877890037679841, |
|
"eval_f1_micro": 0.8474232610532244, |
|
"eval_loss": 0.10136950016021729, |
|
"eval_roc_auc": 0.9054715489545434, |
|
"eval_runtime": 689.6336, |
|
"eval_samples_per_second": 4.167, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 4658 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5480167014613778, |
|
"eval_f1_macro": 0.7867996984352024, |
|
"eval_f1_micro": 0.8471123755334281, |
|
"eval_loss": 0.09731467068195343, |
|
"eval_roc_auc": 0.9019535814277009, |
|
"eval_runtime": 689.7429, |
|
"eval_samples_per_second": 4.167, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 4932 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1078, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5480167014613778, |
|
"eval_f1_macro": 0.789354289479613, |
|
"eval_f1_micro": 0.849087519068874, |
|
"eval_loss": 0.09738590568304062, |
|
"eval_roc_auc": 0.9053669532212902, |
|
"eval_runtime": 687.0367, |
|
"eval_samples_per_second": 4.183, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 5206 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5549756437021572, |
|
"eval_f1_macro": 0.7947863154349663, |
|
"eval_f1_micro": 0.8497521508745941, |
|
"eval_loss": 0.0971071869134903, |
|
"eval_roc_auc": 0.9029799344302967, |
|
"eval_runtime": 693.4393, |
|
"eval_samples_per_second": 4.145, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1061, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5532359081419624, |
|
"eval_f1_macro": 0.793994619616555, |
|
"eval_f1_micro": 0.850910726332359, |
|
"eval_loss": 0.09643097966909409, |
|
"eval_roc_auc": 0.908055677859469, |
|
"eval_runtime": 689.9756, |
|
"eval_samples_per_second": 4.165, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 5754 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1048, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5563674321503131, |
|
"eval_f1_macro": 0.7973736665550476, |
|
"eval_f1_micro": 0.8519603424966201, |
|
"eval_loss": 0.096234992146492, |
|
"eval_roc_auc": 0.9079748210535556, |
|
"eval_runtime": 688.8118, |
|
"eval_samples_per_second": 4.172, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 6028 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.558455114822547, |
|
"eval_f1_macro": 0.7969454250638132, |
|
"eval_f1_micro": 0.8504731861198739, |
|
"eval_loss": 0.09601961821317673, |
|
"eval_roc_auc": 0.9012155078858011, |
|
"eval_runtime": 688.0026, |
|
"eval_samples_per_second": 4.177, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 6302 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1038, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5626304801670147, |
|
"eval_f1_macro": 0.7974458635640262, |
|
"eval_f1_micro": 0.8510467909850132, |
|
"eval_loss": 0.09510745108127594, |
|
"eval_roc_auc": 0.9024119192380319, |
|
"eval_runtime": 688.423, |
|
"eval_samples_per_second": 4.175, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 6576 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5643702157272095, |
|
"eval_f1_macro": 0.795289513465328, |
|
"eval_f1_micro": 0.8511713367018835, |
|
"eval_loss": 0.0944407731294632, |
|
"eval_roc_auc": 0.9012469687818218, |
|
"eval_runtime": 683.8812, |
|
"eval_samples_per_second": 4.202, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1017, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5640222686151705, |
|
"eval_f1_macro": 0.8036711965439244, |
|
"eval_f1_micro": 0.8572393605043909, |
|
"eval_loss": 0.0948282852768898, |
|
"eval_roc_auc": 0.9111790013806387, |
|
"eval_runtime": 681.6858, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 7124 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5636743215031316, |
|
"eval_f1_macro": 0.8034638180358344, |
|
"eval_f1_micro": 0.8551240743881069, |
|
"eval_loss": 0.09229259192943573, |
|
"eval_roc_auc": 0.9086109391822021, |
|
"eval_runtime": 683.6776, |
|
"eval_samples_per_second": 4.204, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 7398 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1008, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5643702157272095, |
|
"eval_f1_macro": 0.8072611584992022, |
|
"eval_f1_micro": 0.8561391580259505, |
|
"eval_loss": 0.0919216200709343, |
|
"eval_roc_auc": 0.9083895171196321, |
|
"eval_runtime": 676.936, |
|
"eval_samples_per_second": 4.246, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 7672 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5681976339596382, |
|
"eval_f1_macro": 0.807775544791943, |
|
"eval_f1_micro": 0.8571590844550463, |
|
"eval_loss": 0.09229801595211029, |
|
"eval_roc_auc": 0.9081680950570622, |
|
"eval_runtime": 680.3447, |
|
"eval_samples_per_second": 4.224, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 7946 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5636743215031316, |
|
"eval_f1_macro": 0.8078629475879894, |
|
"eval_f1_micro": 0.8560661454525001, |
|
"eval_loss": 0.09243426471948624, |
|
"eval_roc_auc": 0.9107996520688381, |
|
"eval_runtime": 679.1764, |
|
"eval_samples_per_second": 4.232, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5688935281837161, |
|
"eval_f1_macro": 0.8043753783436429, |
|
"eval_f1_micro": 0.8549068890666057, |
|
"eval_loss": 0.09250637888908386, |
|
"eval_roc_auc": 0.9050076062220636, |
|
"eval_runtime": 675.7031, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0987, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5678496868475992, |
|
"eval_f1_macro": 0.8071226305218325, |
|
"eval_f1_micro": 0.858236685057989, |
|
"eval_loss": 0.09133294969797134, |
|
"eval_roc_auc": 0.9117040473456065, |
|
"eval_runtime": 677.7385, |
|
"eval_samples_per_second": 4.241, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 32.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0983, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5692414752957551, |
|
"eval_f1_macro": 0.8081519622072744, |
|
"eval_f1_micro": 0.8570938803496942, |
|
"eval_loss": 0.09114891290664673, |
|
"eval_roc_auc": 0.9061295874509765, |
|
"eval_runtime": 681.1845, |
|
"eval_samples_per_second": 4.219, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 9042 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5709812108559499, |
|
"eval_f1_macro": 0.8059984375887345, |
|
"eval_f1_micro": 0.8570447522032734, |
|
"eval_loss": 0.09058225899934769, |
|
"eval_roc_auc": 0.9055923606377748, |
|
"eval_runtime": 681.0802, |
|
"eval_samples_per_second": 4.22, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 9316 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0967, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5692414752957551, |
|
"eval_f1_macro": 0.8103551770491668, |
|
"eval_f1_micro": 0.857759845428198, |
|
"eval_loss": 0.09091359376907349, |
|
"eval_roc_auc": 0.9083150869963146, |
|
"eval_runtime": 683.7099, |
|
"eval_samples_per_second": 4.204, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5748086290883786, |
|
"eval_f1_macro": 0.8114188986781382, |
|
"eval_f1_micro": 0.8582166040314315, |
|
"eval_loss": 0.09166968613862991, |
|
"eval_roc_auc": 0.9079081626467485, |
|
"eval_runtime": 677.0062, |
|
"eval_samples_per_second": 4.245, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 9864 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0963, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5741127348643006, |
|
"eval_f1_macro": 0.8104359485439742, |
|
"eval_f1_micro": 0.8571918983865431, |
|
"eval_loss": 0.09075025469064713, |
|
"eval_roc_auc": 0.9057496153700481, |
|
"eval_runtime": 682.1714, |
|
"eval_samples_per_second": 4.213, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 10138 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5709812108559499, |
|
"eval_f1_macro": 0.8135949001200257, |
|
"eval_f1_micro": 0.8594423033325777, |
|
"eval_loss": 0.09104561805725098, |
|
"eval_roc_auc": 0.9101469439602342, |
|
"eval_runtime": 690.1946, |
|
"eval_samples_per_second": 4.164, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 10412 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0957, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5685455810716771, |
|
"eval_f1_macro": 0.808520223441343, |
|
"eval_f1_micro": 0.8577247270464444, |
|
"eval_loss": 0.09074629843235016, |
|
"eval_roc_auc": 0.9098080230513902, |
|
"eval_runtime": 678.1058, |
|
"eval_samples_per_second": 4.238, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 10686 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5730688935281837, |
|
"eval_f1_macro": 0.8111504469893477, |
|
"eval_f1_micro": 0.8592332123411979, |
|
"eval_loss": 0.09030281752347946, |
|
"eval_roc_auc": 0.909802268885752, |
|
"eval_runtime": 695.8681, |
|
"eval_samples_per_second": 4.13, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.0001, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 40.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0953, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5716771050800278, |
|
"eval_f1_macro": 0.8133805742659422, |
|
"eval_f1_micro": 0.8586208856801775, |
|
"eval_loss": 0.09064245969057083, |
|
"eval_roc_auc": 0.9086828782290092, |
|
"eval_runtime": 687.8411, |
|
"eval_samples_per_second": 4.178, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 11234 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0943, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5664578983994433, |
|
"eval_f1_macro": 0.8135815799291138, |
|
"eval_f1_micro": 0.8584246692032484, |
|
"eval_loss": 0.09031981229782104, |
|
"eval_roc_auc": 0.9089139403154726, |
|
"eval_runtime": 684.2332, |
|
"eval_samples_per_second": 4.2, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 11508 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.569937369519833, |
|
"eval_f1_macro": 0.8177715667121555, |
|
"eval_f1_micro": 0.8603735373537355, |
|
"eval_loss": 0.09048929065465927, |
|
"eval_roc_auc": 0.9131758350455123, |
|
"eval_runtime": 683.871, |
|
"eval_samples_per_second": 4.203, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 11782 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0947, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5727209464161448, |
|
"eval_f1_macro": 0.8149031816603105, |
|
"eval_f1_micro": 0.8585443759981747, |
|
"eval_loss": 0.090988889336586, |
|
"eval_roc_auc": 0.9075230591693096, |
|
"eval_runtime": 686.4073, |
|
"eval_samples_per_second": 4.187, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 12056 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5727209464161448, |
|
"eval_f1_macro": 0.8112945515986235, |
|
"eval_f1_micro": 0.8590971272229823, |
|
"eval_loss": 0.09051001071929932, |
|
"eval_roc_auc": 0.9080583679272985, |
|
"eval_runtime": 690.3088, |
|
"eval_samples_per_second": 4.163, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0925, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5727209464161448, |
|
"eval_f1_macro": 0.8138956921603455, |
|
"eval_f1_micro": 0.8608370193943518, |
|
"eval_loss": 0.08959119021892548, |
|
"eval_roc_auc": 0.9107387478276688, |
|
"eval_runtime": 684.5538, |
|
"eval_samples_per_second": 4.198, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 12604 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5744606819763396, |
|
"eval_f1_macro": 0.8154159530277365, |
|
"eval_f1_micro": 0.8598835217540253, |
|
"eval_loss": 0.08953865617513657, |
|
"eval_roc_auc": 0.9079274426945352, |
|
"eval_runtime": 681.6068, |
|
"eval_samples_per_second": 4.217, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 12878 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0928, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5744606819763396, |
|
"eval_f1_macro": 0.8154966869589858, |
|
"eval_f1_micro": 0.8605536922289807, |
|
"eval_loss": 0.08962185680866241, |
|
"eval_roc_auc": 0.9097631357688805, |
|
"eval_runtime": 684.997, |
|
"eval_samples_per_second": 4.196, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 13152 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5727209464161448, |
|
"eval_f1_macro": 0.8168754926591527, |
|
"eval_f1_micro": 0.8606169781580725, |
|
"eval_loss": 0.08909053355455399, |
|
"eval_roc_auc": 0.9130853382157057, |
|
"eval_runtime": 683.2092, |
|
"eval_samples_per_second": 4.207, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 13426 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0914, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5734168406402227, |
|
"eval_f1_macro": 0.8182687784925751, |
|
"eval_f1_micro": 0.8616618652205841, |
|
"eval_loss": 0.08951092511415482, |
|
"eval_roc_auc": 0.9125141096821429, |
|
"eval_runtime": 683.8641, |
|
"eval_samples_per_second": 4.203, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5668058455114823, |
|
"eval_f1_macro": 0.8184177894108883, |
|
"eval_f1_micro": 0.8608232987958555, |
|
"eval_loss": 0.09029122442007065, |
|
"eval_roc_auc": 0.914931294083072, |
|
"eval_runtime": 685.4274, |
|
"eval_samples_per_second": 4.193, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 13974 |
|
}, |
|
{ |
|
"epoch": 51.09, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0919, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5762004175365344, |
|
"eval_f1_macro": 0.8172163352414866, |
|
"eval_f1_micro": 0.8617045454545454, |
|
"eval_loss": 0.09041330218315125, |
|
"eval_roc_auc": 0.9105776569849702, |
|
"eval_runtime": 686.3022, |
|
"eval_samples_per_second": 4.188, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 14248 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.091, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5734168406402227, |
|
"eval_f1_macro": 0.8154347454270638, |
|
"eval_f1_micro": 0.8604036655984708, |
|
"eval_loss": 0.09106075763702393, |
|
"eval_roc_auc": 0.913401765735465, |
|
"eval_runtime": 686.9936, |
|
"eval_samples_per_second": 4.183, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 14522 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5751565762004175, |
|
"eval_f1_macro": 0.822392587875712, |
|
"eval_f1_micro": 0.8628963639457711, |
|
"eval_loss": 0.09085189551115036, |
|
"eval_roc_auc": 0.9117971844954131, |
|
"eval_runtime": 691.549, |
|
"eval_samples_per_second": 4.156, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 14796 |
|
}, |
|
{ |
|
"epoch": 54.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0907, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.5720250521920668, |
|
"eval_f1_macro": 0.8246722143238872, |
|
"eval_f1_micro": 0.862824401752612, |
|
"eval_loss": 0.0893503949046135, |
|
"eval_roc_auc": 0.9150558423810694, |
|
"eval_runtime": 687.0743, |
|
"eval_samples_per_second": 4.183, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 15070 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5723729993041058, |
|
"eval_f1_macro": 0.8197285299784532, |
|
"eval_f1_micro": 0.8613505337062617, |
|
"eval_loss": 0.0895121842622757, |
|
"eval_roc_auc": 0.9088388874230271, |
|
"eval_runtime": 688.6878, |
|
"eval_samples_per_second": 4.173, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 15344 |
|
}, |
|
{ |
|
"epoch": 56.57, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0883, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5755045233124565, |
|
"eval_f1_macro": 0.8261680546876228, |
|
"eval_f1_micro": 0.8653240324032403, |
|
"eval_loss": 0.08795319497585297, |
|
"eval_roc_auc": 0.9159717957369441, |
|
"eval_runtime": 680.4805, |
|
"eval_samples_per_second": 4.223, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 15618 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5782881002087683, |
|
"eval_f1_macro": 0.8227228870436498, |
|
"eval_f1_micro": 0.8639262127078114, |
|
"eval_loss": 0.08846761286258698, |
|
"eval_roc_auc": 0.9111322457907458, |
|
"eval_runtime": 678.456, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 15892 |
|
}, |
|
{ |
|
"epoch": 58.39, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0872, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5765483646485734, |
|
"eval_f1_macro": 0.8262742568594247, |
|
"eval_f1_micro": 0.8655003656409969, |
|
"eval_loss": 0.0878983661532402, |
|
"eval_roc_auc": 0.9160905401214736, |
|
"eval_runtime": 680.5904, |
|
"eval_samples_per_second": 4.223, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 16166 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5800278357689631, |
|
"eval_f1_macro": 0.8238378094426198, |
|
"eval_f1_micro": 0.8654139156932453, |
|
"eval_loss": 0.08844566345214844, |
|
"eval_roc_auc": 0.914969231409518, |
|
"eval_runtime": 682.0838, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0873, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.5744606819763396, |
|
"eval_f1_macro": 0.8265572971487117, |
|
"eval_f1_micro": 0.8651893408134642, |
|
"eval_loss": 0.0878659188747406, |
|
"eval_roc_auc": 0.9168337948077135, |
|
"eval_runtime": 683.217, |
|
"eval_samples_per_second": 4.207, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 16714 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.5765483646485734, |
|
"eval_f1_macro": 0.8251828516128455, |
|
"eval_f1_micro": 0.8649870071178397, |
|
"eval_loss": 0.08799029141664505, |
|
"eval_roc_auc": 0.9143652466938494, |
|
"eval_runtime": 680.2736, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 62.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0864, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.5800278357689631, |
|
"eval_f1_macro": 0.8266891115852992, |
|
"eval_f1_micro": 0.8650424929178471, |
|
"eval_loss": 0.08828118443489075, |
|
"eval_roc_auc": 0.9134011927141672, |
|
"eval_runtime": 677.3735, |
|
"eval_samples_per_second": 4.243, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 17262 |
|
}, |
|
{ |
|
"epoch": 63.87, |
|
"learning_rate": 1e-05, |
|
"loss": 0.086, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.5782881002087683, |
|
"eval_f1_macro": 0.8256635970178378, |
|
"eval_f1_micro": 0.8667077889306342, |
|
"eval_loss": 0.08754145354032516, |
|
"eval_roc_auc": 0.9178472944451183, |
|
"eval_runtime": 682.5828, |
|
"eval_samples_per_second": 4.21, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 17536 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.58107167710508, |
|
"eval_f1_macro": 0.8277460823758025, |
|
"eval_f1_micro": 0.8669750648764526, |
|
"eval_loss": 0.08722905069589615, |
|
"eval_roc_auc": 0.9159442206991787, |
|
"eval_runtime": 673.5072, |
|
"eval_samples_per_second": 4.267, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 1e-05, |
|
"step": 17810 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0855, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.581767571329158, |
|
"eval_f1_macro": 0.8263083392061107, |
|
"eval_f1_micro": 0.8662405972512867, |
|
"eval_loss": 0.0872766524553299, |
|
"eval_roc_auc": 0.9146675753101624, |
|
"eval_runtime": 674.2325, |
|
"eval_samples_per_second": 4.263, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 18084 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.5796798886569241, |
|
"eval_f1_macro": 0.8236507380069967, |
|
"eval_f1_micro": 0.8647603888351997, |
|
"eval_loss": 0.08779256045818329, |
|
"eval_roc_auc": 0.9121142845321298, |
|
"eval_runtime": 672.7686, |
|
"eval_samples_per_second": 4.272, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 1e-05, |
|
"step": 18358 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0853, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.580723729993041, |
|
"eval_f1_macro": 0.82334160354742, |
|
"eval_f1_micro": 0.8644058136221144, |
|
"eval_loss": 0.08787883818149567, |
|
"eval_roc_auc": 0.9110366175644288, |
|
"eval_runtime": 678.5717, |
|
"eval_samples_per_second": 4.235, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 18632 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.5831593597773138, |
|
"eval_f1_macro": 0.8274164123414606, |
|
"eval_f1_micro": 0.8653988078342322, |
|
"eval_loss": 0.08730249851942062, |
|
"eval_roc_auc": 0.9129307238034322, |
|
"eval_runtime": 682.302, |
|
"eval_samples_per_second": 4.212, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 18906 |
|
}, |
|
{ |
|
"epoch": 69.34, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0854, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.58107167710508, |
|
"eval_f1_macro": 0.8286701109278063, |
|
"eval_f1_micro": 0.8661381908135155, |
|
"eval_loss": 0.08733326941728592, |
|
"eval_roc_auc": 0.9166425383550794, |
|
"eval_runtime": 673.3186, |
|
"eval_samples_per_second": 4.268, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 1e-05, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.5779401530967293, |
|
"eval_f1_macro": 0.8262073521627441, |
|
"eval_f1_micro": 0.865708650324035, |
|
"eval_loss": 0.08731996268033981, |
|
"eval_roc_auc": 0.9155950369973136, |
|
"eval_runtime": 672.8744, |
|
"eval_samples_per_second": 4.271, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 1e-05, |
|
"step": 19454 |
|
}, |
|
{ |
|
"epoch": 71.17, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0847, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.5803757828810021, |
|
"eval_f1_macro": 0.8279492189021646, |
|
"eval_f1_micro": 0.8660418654245468, |
|
"eval_loss": 0.08729101717472076, |
|
"eval_roc_auc": 0.9172015860404081, |
|
"eval_runtime": 676.9231, |
|
"eval_samples_per_second": 4.246, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 19728 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0852, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.5765483646485734, |
|
"eval_f1_macro": 0.8258696329291023, |
|
"eval_f1_micro": 0.8661956034096008, |
|
"eval_loss": 0.08899407833814621, |
|
"eval_roc_auc": 0.917537916377082, |
|
"eval_runtime": 674.8648, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 20002 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.5835073068893528, |
|
"eval_f1_macro": 0.8266751443826955, |
|
"eval_f1_micro": 0.8663119764546072, |
|
"eval_loss": 0.08706125617027283, |
|
"eval_roc_auc": 0.9144583340958263, |
|
"eval_runtime": 676.3788, |
|
"eval_samples_per_second": 4.249, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 20276 |
|
}, |
|
{ |
|
"epoch": 74.82, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0845, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.5762004175365344, |
|
"eval_f1_macro": 0.8242525331164202, |
|
"eval_f1_micro": 0.8650994982806247, |
|
"eval_loss": 0.08718431740999222, |
|
"eval_roc_auc": 0.9151367489348123, |
|
"eval_runtime": 674.1856, |
|
"eval_samples_per_second": 4.263, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.5775922059846903, |
|
"eval_f1_macro": 0.8258404959868192, |
|
"eval_f1_micro": 0.8660362490149724, |
|
"eval_loss": 0.08712752908468246, |
|
"eval_roc_auc": 0.9161823322373652, |
|
"eval_runtime": 676.0536, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 20824 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0849, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.5779401530967293, |
|
"eval_f1_macro": 0.8262597281814207, |
|
"eval_f1_micro": 0.8654561858576745, |
|
"eval_loss": 0.08787967264652252, |
|
"eval_roc_auc": 0.915242017185023, |
|
"eval_runtime": 678.4216, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 21098 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.5779401530967293, |
|
"eval_f1_macro": 0.824064674812195, |
|
"eval_f1_micro": 0.8647364849581541, |
|
"eval_loss": 0.08832630515098572, |
|
"eval_roc_auc": 0.9138800063627106, |
|
"eval_runtime": 674.504, |
|
"eval_samples_per_second": 4.261, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 21372 |
|
}, |
|
{ |
|
"epoch": 78.47, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0853, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.580723729993041, |
|
"eval_f1_macro": 0.8283767069034536, |
|
"eval_f1_micro": 0.8667153859126425, |
|
"eval_loss": 0.08727473765611649, |
|
"eval_roc_auc": 0.9170071162464759, |
|
"eval_runtime": 680.1183, |
|
"eval_samples_per_second": 4.226, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 21646 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.581419624217119, |
|
"eval_f1_macro": 0.8257519474670673, |
|
"eval_f1_micro": 0.8654216185625353, |
|
"eval_loss": 0.08734780550003052, |
|
"eval_roc_auc": 0.9139968326920274, |
|
"eval_runtime": 682.9935, |
|
"eval_samples_per_second": 4.208, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 80.29, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.0838, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.5828114126652749, |
|
"eval_f1_macro": 0.8261813753948223, |
|
"eval_f1_micro": 0.8653922514039366, |
|
"eval_loss": 0.08708538860082626, |
|
"eval_roc_auc": 0.9131951648411291, |
|
"eval_runtime": 690.614, |
|
"eval_samples_per_second": 4.162, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 22194 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.581767571329158, |
|
"eval_f1_macro": 0.8253000981325144, |
|
"eval_f1_micro": 0.866888801039137, |
|
"eval_loss": 0.08740255981683731, |
|
"eval_roc_auc": 0.9155308696670169, |
|
"eval_runtime": 680.0034, |
|
"eval_samples_per_second": 4.226, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 22468 |
|
}, |
|
{ |
|
"epoch": 82.12, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.0842, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.5845511482254697, |
|
"eval_f1_macro": 0.8282173993454429, |
|
"eval_f1_micro": 0.8666929710839298, |
|
"eval_loss": 0.08695908635854721, |
|
"eval_roc_auc": 0.9160732278767293, |
|
"eval_runtime": 685.2501, |
|
"eval_samples_per_second": 4.194, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 22742 |
|
}, |
|
{ |
|
"epoch": 83.94, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.0837, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.58107167710508, |
|
"eval_f1_macro": 0.8233437650206237, |
|
"eval_f1_micro": 0.8627316009866345, |
|
"eval_loss": 0.08810650557279587, |
|
"eval_roc_auc": 0.9079679208453217, |
|
"eval_runtime": 685.0438, |
|
"eval_samples_per_second": 4.195, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 23016 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.580723729993041, |
|
"eval_f1_macro": 0.8276925304690478, |
|
"eval_f1_micro": 0.8657459814353634, |
|
"eval_loss": 0.08707784116268158, |
|
"eval_roc_auc": 0.9141406112899818, |
|
"eval_runtime": 679.238, |
|
"eval_samples_per_second": 4.231, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 23290 |
|
}, |
|
{ |
|
"epoch": 85.77, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.0852, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.5835073068893528, |
|
"eval_f1_macro": 0.826547474740738, |
|
"eval_f1_micro": 0.865543367635418, |
|
"eval_loss": 0.08730652928352356, |
|
"eval_roc_auc": 0.9143716733764451, |
|
"eval_runtime": 682.0055, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 23564 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.5789839944328462, |
|
"eval_f1_macro": 0.8262746572799179, |
|
"eval_f1_micro": 0.8649230072463768, |
|
"eval_loss": 0.08727239817380905, |
|
"eval_roc_auc": 0.9136020891906951, |
|
"eval_runtime": 680.947, |
|
"eval_samples_per_second": 4.221, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 23838 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.084, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.5800278357689631, |
|
"eval_f1_macro": 0.826834019166277, |
|
"eval_f1_micro": 0.8667940647482015, |
|
"eval_loss": 0.08701465278863907, |
|
"eval_roc_auc": 0.9171091711553887, |
|
"eval_runtime": 684.253, |
|
"eval_samples_per_second": 4.2, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24112 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.581419624217119, |
|
"eval_f1_macro": 0.8258806291830417, |
|
"eval_f1_micro": 0.8656834940442428, |
|
"eval_loss": 0.08738242834806442, |
|
"eval_roc_auc": 0.9133774537868273, |
|
"eval_runtime": 691.191, |
|
"eval_samples_per_second": 4.158, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24386 |
|
}, |
|
{ |
|
"epoch": 89.42, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"loss": 0.0852, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.5821155184411969, |
|
"eval_f1_macro": 0.8300231463369614, |
|
"eval_f1_micro": 0.8671707482225482, |
|
"eval_loss": 0.08719626069068909, |
|
"eval_roc_auc": 0.915977599650412, |
|
"eval_runtime": 674.5704, |
|
"eval_samples_per_second": 4.26, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.5845511482254697, |
|
"eval_f1_macro": 0.8264026881090635, |
|
"eval_f1_micro": 0.8663228877429592, |
|
"eval_loss": 0.0870579332113266, |
|
"eval_roc_auc": 0.9140616725392814, |
|
"eval_runtime": 679.6794, |
|
"eval_samples_per_second": 4.228, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 24934 |
|
}, |
|
{ |
|
"epoch": 91.24, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"loss": 0.0839, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.581767571329158, |
|
"eval_f1_macro": 0.8247546872604429, |
|
"eval_f1_micro": 0.8645293548203405, |
|
"eval_loss": 0.08783115446567535, |
|
"eval_roc_auc": 0.9114114582488465, |
|
"eval_runtime": 675.1186, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 25208 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.5786360473208072, |
|
"eval_f1_macro": 0.8273023006171618, |
|
"eval_f1_micro": 0.8666216063985582, |
|
"eval_loss": 0.08800956606864929, |
|
"eval_roc_auc": 0.9162824690910654, |
|
"eval_runtime": 682.4811, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 25482, |
|
"total_flos": 1.2085527749476667e+21, |
|
"train_loss": 0.009088765563437894, |
|
"train_runtime": 27917.3314, |
|
"train_samples_per_second": 34.532, |
|
"train_steps_per_second": 1.08 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 110, |
|
"save_steps": 500, |
|
"total_flos": 1.2085527749476667e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|