{ "best_metric": 0.08695908635854721, "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_01_24-with_data_aug_batch-size32_epochs85_freeze/checkpoint-22742", "epoch": 85.0, "eval_steps": 500, "global_step": 23290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.45894224077940154, "eval_f1_macro": 0.6395389989693074, "eval_f1_micro": 0.7737575503857426, "eval_loss": 0.13585977256298065, "eval_roc_auc": 0.8471240403763409, "eval_runtime": 675.8068, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 274 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 0.2459, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.4940848990953375, "eval_f1_macro": 0.7304998296932924, "eval_f1_micro": 0.8032231694499591, "eval_loss": 0.12362784147262573, "eval_roc_auc": 0.8697341470820456, "eval_runtime": 678.2974, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 548 }, { "epoch": 3.0, "eval_accuracy": 0.5125260960334029, "eval_f1_macro": 0.7426440054746392, "eval_f1_micro": 0.8174202432866652, "eval_loss": 0.11671263724565506, "eval_roc_auc": 0.8827824537503088, "eval_runtime": 674.2849, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 822 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 0.1403, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5100904662491301, "eval_f1_macro": 0.7481206268648029, "eval_f1_micro": 0.817623068527773, "eval_loss": 0.11555441468954086, "eval_roc_auc": 0.8825597364016536, "eval_runtime": 684.1218, "eval_samples_per_second": 4.201, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 1096 }, { "epoch": 5.0, "eval_accuracy": 0.5243562978427279, "eval_f1_macro": 0.7614020034586013, "eval_f1_micro": 0.8267689489351958, "eval_loss": 0.11359219998121262, "eval_roc_auc": 0.8886760312325277, "eval_runtime": 674.0166, "eval_samples_per_second": 4.264, "eval_steps_per_second": 0.134, "learning_rate": 0.001, "step": 1370 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 0.1313, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.5219206680584552, "eval_f1_macro": 0.7508698006051816, "eval_f1_micro": 0.8210489222998767, "eval_loss": 0.11100047826766968, "eval_roc_auc": 0.877677266975988, "eval_runtime": 676.1, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 1644 }, { "epoch": 7.0, "eval_accuracy": 0.5323590814196242, "eval_f1_macro": 0.7613673312506429, "eval_f1_micro": 0.8288991092740292, "eval_loss": 0.10846547037363052, "eval_roc_auc": 0.8846228046955259, "eval_runtime": 682.0096, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 1918 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 0.1289, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5379262352122477, "eval_f1_macro": 0.7711215001442554, "eval_f1_micro": 0.8331729408434757, "eval_loss": 0.11005302518606186, "eval_roc_auc": 0.8958012673255937, "eval_runtime": 682.26, "eval_samples_per_second": 4.212, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 2192 }, { "epoch": 9.0, "eval_accuracy": 0.5139178844815588, "eval_f1_macro": 0.7669688558128348, "eval_f1_micro": 0.8271255519076193, "eval_loss": 0.11129175871610641, "eval_roc_auc": 0.8924250608458335, "eval_runtime": 683.3423, "eval_samples_per_second": 4.206, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 2466 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 0.1268, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.5313152400835073, "eval_f1_macro": 0.7610925982620881, "eval_f1_micro": 0.8258011503697616, "eval_loss": 0.11381296068429947, "eval_roc_auc": 0.880444980112697, "eval_runtime": 679.9943, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 2740 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 0.1255, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.5260960334029228, "eval_f1_macro": 0.762697586166308, "eval_f1_micro": 0.8262265016047684, "eval_loss": 0.11390296369791031, "eval_roc_auc": 0.8880168466934987, "eval_runtime": 678.1509, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 3014 }, { "epoch": 12.0, "eval_accuracy": 0.5337508698677801, "eval_f1_macro": 0.7573087365131856, "eval_f1_micro": 0.8210012500744092, "eval_loss": 0.11208122968673706, "eval_roc_auc": 0.8736066784464123, "eval_runtime": 680.166, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 3288 }, { "epoch": 12.77, "learning_rate": 0.001, "loss": 0.1253, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.5219206680584552, "eval_f1_macro": 0.7489136029171714, "eval_f1_micro": 0.8207366032466399, "eval_loss": 0.1110881045460701, "eval_roc_auc": 0.8803454162802951, "eval_runtime": 682.0648, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 3562 }, { "epoch": 14.0, "eval_accuracy": 0.5400139178844816, "eval_f1_macro": 0.7776741330298375, "eval_f1_micro": 0.8408186469584993, "eval_loss": 0.10247301310300827, "eval_roc_auc": 0.8987147268632997, "eval_runtime": 676.5367, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 3836 }, { "epoch": 14.6, "learning_rate": 0.0001, "loss": 0.1171, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.5403618649965205, "eval_f1_macro": 0.7795139529876273, "eval_f1_micro": 0.842865329512894, "eval_loss": 0.0998576357960701, "eval_roc_auc": 0.897277663148542, "eval_runtime": 675.6889, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 4110 }, { "epoch": 16.0, "eval_accuracy": 0.5407098121085595, "eval_f1_macro": 0.7861162275453341, "eval_f1_micro": 0.8462626605556499, "eval_loss": 0.10081179440021515, "eval_roc_auc": 0.9032963122022265, "eval_runtime": 680.4113, "eval_samples_per_second": 4.224, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 4384 }, { "epoch": 16.42, "learning_rate": 0.0001, "loss": 0.1107, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.545929018789144, "eval_f1_macro": 0.7877890037679841, "eval_f1_micro": 0.8474232610532244, "eval_loss": 0.10136950016021729, "eval_roc_auc": 0.9054715489545434, "eval_runtime": 689.6336, "eval_samples_per_second": 4.167, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 4658 }, { "epoch": 18.0, "eval_accuracy": 0.5480167014613778, "eval_f1_macro": 0.7867996984352024, "eval_f1_micro": 0.8471123755334281, "eval_loss": 0.09731467068195343, "eval_roc_auc": 0.9019535814277009, "eval_runtime": 689.7429, "eval_samples_per_second": 4.167, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 4932 }, { "epoch": 18.25, "learning_rate": 0.0001, "loss": 0.1078, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.5480167014613778, "eval_f1_macro": 0.789354289479613, "eval_f1_micro": 0.849087519068874, "eval_loss": 0.09738590568304062, "eval_roc_auc": 0.9053669532212902, "eval_runtime": 687.0367, "eval_samples_per_second": 4.183, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 5206 }, { "epoch": 20.0, "eval_accuracy": 0.5549756437021572, "eval_f1_macro": 0.7947863154349663, "eval_f1_micro": 0.8497521508745941, "eval_loss": 0.0971071869134903, "eval_roc_auc": 0.9029799344302967, "eval_runtime": 693.4393, "eval_samples_per_second": 4.145, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 5480 }, { "epoch": 20.07, "learning_rate": 0.0001, "loss": 0.1061, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.5532359081419624, "eval_f1_macro": 0.793994619616555, "eval_f1_micro": 0.850910726332359, "eval_loss": 0.09643097966909409, "eval_roc_auc": 0.908055677859469, "eval_runtime": 689.9756, "eval_samples_per_second": 4.165, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 5754 }, { "epoch": 21.9, "learning_rate": 0.0001, "loss": 0.1048, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.5563674321503131, "eval_f1_macro": 0.7973736665550476, "eval_f1_micro": 0.8519603424966201, "eval_loss": 0.096234992146492, "eval_roc_auc": 0.9079748210535556, "eval_runtime": 688.8118, "eval_samples_per_second": 4.172, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 6028 }, { "epoch": 23.0, "eval_accuracy": 0.558455114822547, "eval_f1_macro": 0.7969454250638132, "eval_f1_micro": 0.8504731861198739, "eval_loss": 0.09601961821317673, "eval_roc_auc": 0.9012155078858011, "eval_runtime": 688.0026, "eval_samples_per_second": 4.177, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 6302 }, { "epoch": 23.72, "learning_rate": 0.0001, "loss": 0.1038, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.5626304801670147, "eval_f1_macro": 0.7974458635640262, "eval_f1_micro": 0.8510467909850132, "eval_loss": 0.09510745108127594, "eval_roc_auc": 0.9024119192380319, "eval_runtime": 688.423, "eval_samples_per_second": 4.175, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 6576 }, { "epoch": 25.0, "eval_accuracy": 0.5643702157272095, "eval_f1_macro": 0.795289513465328, "eval_f1_micro": 0.8511713367018835, "eval_loss": 0.0944407731294632, "eval_roc_auc": 0.9012469687818218, "eval_runtime": 683.8812, "eval_samples_per_second": 4.202, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 6850 }, { "epoch": 25.55, "learning_rate": 0.0001, "loss": 0.1017, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.5640222686151705, "eval_f1_macro": 0.8036711965439244, "eval_f1_micro": 0.8572393605043909, "eval_loss": 0.0948282852768898, "eval_roc_auc": 0.9111790013806387, "eval_runtime": 681.6858, "eval_samples_per_second": 4.216, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 7124 }, { "epoch": 27.0, "eval_accuracy": 0.5636743215031316, "eval_f1_macro": 0.8034638180358344, "eval_f1_micro": 0.8551240743881069, "eval_loss": 0.09229259192943573, "eval_roc_auc": 0.9086109391822021, "eval_runtime": 683.6776, "eval_samples_per_second": 4.204, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 7398 }, { "epoch": 27.37, "learning_rate": 0.0001, "loss": 0.1008, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.5643702157272095, "eval_f1_macro": 0.8072611584992022, "eval_f1_micro": 0.8561391580259505, "eval_loss": 0.0919216200709343, "eval_roc_auc": 0.9083895171196321, "eval_runtime": 676.936, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 7672 }, { "epoch": 29.0, "eval_accuracy": 0.5681976339596382, "eval_f1_macro": 0.807775544791943, "eval_f1_micro": 0.8571590844550463, "eval_loss": 0.09229801595211029, "eval_roc_auc": 0.9081680950570622, "eval_runtime": 680.3447, "eval_samples_per_second": 4.224, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 7946 }, { "epoch": 29.2, "learning_rate": 0.0001, "loss": 0.1006, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.5636743215031316, "eval_f1_macro": 0.8078629475879894, "eval_f1_micro": 0.8560661454525001, "eval_loss": 0.09243426471948624, "eval_roc_auc": 0.9107996520688381, "eval_runtime": 679.1764, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 8220 }, { "epoch": 31.0, "eval_accuracy": 0.5688935281837161, "eval_f1_macro": 0.8043753783436429, "eval_f1_micro": 0.8549068890666057, "eval_loss": 0.09250637888908386, "eval_roc_auc": 0.9050076062220636, "eval_runtime": 675.7031, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 8494 }, { "epoch": 31.02, "learning_rate": 0.0001, "loss": 0.0987, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.5678496868475992, "eval_f1_macro": 0.8071226305218325, "eval_f1_micro": 0.858236685057989, "eval_loss": 0.09133294969797134, "eval_roc_auc": 0.9117040473456065, "eval_runtime": 677.7385, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 8768 }, { "epoch": 32.85, "learning_rate": 0.0001, "loss": 0.0983, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.5692414752957551, "eval_f1_macro": 0.8081519622072744, "eval_f1_micro": 0.8570938803496942, "eval_loss": 0.09114891290664673, "eval_roc_auc": 0.9061295874509765, "eval_runtime": 681.1845, "eval_samples_per_second": 4.219, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 9042 }, { "epoch": 34.0, "eval_accuracy": 0.5709812108559499, "eval_f1_macro": 0.8059984375887345, "eval_f1_micro": 0.8570447522032734, "eval_loss": 0.09058225899934769, "eval_roc_auc": 0.9055923606377748, "eval_runtime": 681.0802, "eval_samples_per_second": 4.22, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 9316 }, { "epoch": 34.67, "learning_rate": 0.0001, "loss": 0.0967, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.5692414752957551, "eval_f1_macro": 0.8103551770491668, "eval_f1_micro": 0.857759845428198, "eval_loss": 0.09091359376907349, "eval_roc_auc": 0.9083150869963146, "eval_runtime": 683.7099, "eval_samples_per_second": 4.204, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 9590 }, { "epoch": 36.0, "eval_accuracy": 0.5748086290883786, "eval_f1_macro": 0.8114188986781382, "eval_f1_micro": 0.8582166040314315, "eval_loss": 0.09166968613862991, "eval_roc_auc": 0.9079081626467485, "eval_runtime": 677.0062, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 9864 }, { "epoch": 36.5, "learning_rate": 0.0001, "loss": 0.0963, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.5741127348643006, "eval_f1_macro": 0.8104359485439742, "eval_f1_micro": 0.8571918983865431, "eval_loss": 0.09075025469064713, "eval_roc_auc": 0.9057496153700481, "eval_runtime": 682.1714, "eval_samples_per_second": 4.213, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 10138 }, { "epoch": 38.0, "eval_accuracy": 0.5709812108559499, "eval_f1_macro": 0.8135949001200257, "eval_f1_micro": 0.8594423033325777, "eval_loss": 0.09104561805725098, "eval_roc_auc": 0.9101469439602342, "eval_runtime": 690.1946, "eval_samples_per_second": 4.164, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 10412 }, { "epoch": 38.32, "learning_rate": 0.0001, "loss": 0.0957, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.5685455810716771, "eval_f1_macro": 0.808520223441343, "eval_f1_micro": 0.8577247270464444, "eval_loss": 0.09074629843235016, "eval_roc_auc": 0.9098080230513902, "eval_runtime": 678.1058, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 10686 }, { "epoch": 40.0, "eval_accuracy": 0.5730688935281837, "eval_f1_macro": 0.8111504469893477, "eval_f1_micro": 0.8592332123411979, "eval_loss": 0.09030281752347946, "eval_roc_auc": 0.909802268885752, "eval_runtime": 695.8681, "eval_samples_per_second": 4.13, "eval_steps_per_second": 0.129, "learning_rate": 0.0001, "step": 10960 }, { "epoch": 40.15, "learning_rate": 0.0001, "loss": 0.0953, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.5716771050800278, "eval_f1_macro": 0.8133805742659422, "eval_f1_micro": 0.8586208856801775, "eval_loss": 0.09064245969057083, "eval_roc_auc": 0.9086828782290092, "eval_runtime": 687.8411, "eval_samples_per_second": 4.178, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 11234 }, { "epoch": 41.97, "learning_rate": 0.0001, "loss": 0.0943, "step": 11500 }, { "epoch": 42.0, "eval_accuracy": 0.5664578983994433, "eval_f1_macro": 0.8135815799291138, "eval_f1_micro": 0.8584246692032484, "eval_loss": 0.09031981229782104, "eval_roc_auc": 0.9089139403154726, "eval_runtime": 684.2332, "eval_samples_per_second": 4.2, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 11508 }, { "epoch": 43.0, "eval_accuracy": 0.569937369519833, "eval_f1_macro": 0.8177715667121555, "eval_f1_micro": 0.8603735373537355, "eval_loss": 0.09048929065465927, "eval_roc_auc": 0.9131758350455123, "eval_runtime": 683.871, "eval_samples_per_second": 4.203, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 11782 }, { "epoch": 43.8, "learning_rate": 0.0001, "loss": 0.0947, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.5727209464161448, "eval_f1_macro": 0.8149031816603105, "eval_f1_micro": 0.8585443759981747, "eval_loss": 0.090988889336586, "eval_roc_auc": 0.9075230591693096, "eval_runtime": 686.4073, "eval_samples_per_second": 4.187, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 12056 }, { "epoch": 45.0, "eval_accuracy": 0.5727209464161448, "eval_f1_macro": 0.8112945515986235, "eval_f1_micro": 0.8590971272229823, "eval_loss": 0.09051001071929932, "eval_roc_auc": 0.9080583679272985, "eval_runtime": 690.3088, "eval_samples_per_second": 4.163, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 12330 }, { "epoch": 45.62, "learning_rate": 0.0001, "loss": 0.0925, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.5727209464161448, "eval_f1_macro": 0.8138956921603455, "eval_f1_micro": 0.8608370193943518, "eval_loss": 0.08959119021892548, "eval_roc_auc": 0.9107387478276688, "eval_runtime": 684.5538, "eval_samples_per_second": 4.198, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 12604 }, { "epoch": 47.0, "eval_accuracy": 0.5744606819763396, "eval_f1_macro": 0.8154159530277365, "eval_f1_micro": 0.8598835217540253, "eval_loss": 0.08953865617513657, "eval_roc_auc": 0.9079274426945352, "eval_runtime": 681.6068, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 12878 }, { "epoch": 47.45, "learning_rate": 0.0001, "loss": 0.0928, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.5744606819763396, "eval_f1_macro": 0.8154966869589858, "eval_f1_micro": 0.8605536922289807, "eval_loss": 0.08962185680866241, "eval_roc_auc": 0.9097631357688805, "eval_runtime": 684.997, "eval_samples_per_second": 4.196, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 13152 }, { "epoch": 49.0, "eval_accuracy": 0.5727209464161448, "eval_f1_macro": 0.8168754926591527, "eval_f1_micro": 0.8606169781580725, "eval_loss": 0.08909053355455399, "eval_roc_auc": 0.9130853382157057, "eval_runtime": 683.2092, "eval_samples_per_second": 4.207, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 13426 }, { "epoch": 49.27, "learning_rate": 0.0001, "loss": 0.0914, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.5734168406402227, "eval_f1_macro": 0.8182687784925751, "eval_f1_micro": 0.8616618652205841, "eval_loss": 0.08951092511415482, "eval_roc_auc": 0.9125141096821429, "eval_runtime": 683.8641, "eval_samples_per_second": 4.203, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 13700 }, { "epoch": 51.0, "eval_accuracy": 0.5668058455114823, "eval_f1_macro": 0.8184177894108883, "eval_f1_micro": 0.8608232987958555, "eval_loss": 0.09029122442007065, "eval_roc_auc": 0.914931294083072, "eval_runtime": 685.4274, "eval_samples_per_second": 4.193, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 13974 }, { "epoch": 51.09, "learning_rate": 0.0001, "loss": 0.0919, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.5762004175365344, "eval_f1_macro": 0.8172163352414866, "eval_f1_micro": 0.8617045454545454, "eval_loss": 0.09041330218315125, "eval_roc_auc": 0.9105776569849702, "eval_runtime": 686.3022, "eval_samples_per_second": 4.188, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 14248 }, { "epoch": 52.92, "learning_rate": 0.0001, "loss": 0.091, "step": 14500 }, { "epoch": 53.0, "eval_accuracy": 0.5734168406402227, "eval_f1_macro": 0.8154347454270638, "eval_f1_micro": 0.8604036655984708, "eval_loss": 0.09106075763702393, "eval_roc_auc": 0.913401765735465, "eval_runtime": 686.9936, "eval_samples_per_second": 4.183, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 14522 }, { "epoch": 54.0, "eval_accuracy": 0.5751565762004175, "eval_f1_macro": 0.822392587875712, "eval_f1_micro": 0.8628963639457711, "eval_loss": 0.09085189551115036, "eval_roc_auc": 0.9117971844954131, "eval_runtime": 691.549, "eval_samples_per_second": 4.156, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 14796 }, { "epoch": 54.74, "learning_rate": 0.0001, "loss": 0.0907, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.5720250521920668, "eval_f1_macro": 0.8246722143238872, "eval_f1_micro": 0.862824401752612, "eval_loss": 0.0893503949046135, "eval_roc_auc": 0.9150558423810694, "eval_runtime": 687.0743, "eval_samples_per_second": 4.183, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 15070 }, { "epoch": 56.0, "eval_accuracy": 0.5723729993041058, "eval_f1_macro": 0.8197285299784532, "eval_f1_micro": 0.8613505337062617, "eval_loss": 0.0895121842622757, "eval_roc_auc": 0.9088388874230271, "eval_runtime": 688.6878, "eval_samples_per_second": 4.173, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 15344 }, { "epoch": 56.57, "learning_rate": 1e-05, "loss": 0.0883, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.5755045233124565, "eval_f1_macro": 0.8261680546876228, "eval_f1_micro": 0.8653240324032403, "eval_loss": 0.08795319497585297, "eval_roc_auc": 0.9159717957369441, "eval_runtime": 680.4805, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 15618 }, { "epoch": 58.0, "eval_accuracy": 0.5782881002087683, "eval_f1_macro": 0.8227228870436498, "eval_f1_micro": 0.8639262127078114, "eval_loss": 0.08846761286258698, "eval_roc_auc": 0.9111322457907458, "eval_runtime": 678.456, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 15892 }, { "epoch": 58.39, "learning_rate": 1e-05, "loss": 0.0872, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.5765483646485734, "eval_f1_macro": 0.8262742568594247, "eval_f1_micro": 0.8655003656409969, "eval_loss": 0.0878983661532402, "eval_roc_auc": 0.9160905401214736, "eval_runtime": 680.5904, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 16166 }, { "epoch": 60.0, "eval_accuracy": 0.5800278357689631, "eval_f1_macro": 0.8238378094426198, "eval_f1_micro": 0.8654139156932453, "eval_loss": 0.08844566345214844, "eval_roc_auc": 0.914969231409518, "eval_runtime": 682.0838, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 16440 }, { "epoch": 60.22, "learning_rate": 1e-05, "loss": 0.0873, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.5744606819763396, "eval_f1_macro": 0.8265572971487117, "eval_f1_micro": 0.8651893408134642, "eval_loss": 0.0878659188747406, "eval_roc_auc": 0.9168337948077135, "eval_runtime": 683.217, "eval_samples_per_second": 4.207, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 16714 }, { "epoch": 62.0, "eval_accuracy": 0.5765483646485734, "eval_f1_macro": 0.8251828516128455, "eval_f1_micro": 0.8649870071178397, "eval_loss": 0.08799029141664505, "eval_roc_auc": 0.9143652466938494, "eval_runtime": 680.2736, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 16988 }, { "epoch": 62.04, "learning_rate": 1e-05, "loss": 0.0864, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.5800278357689631, "eval_f1_macro": 0.8266891115852992, "eval_f1_micro": 0.8650424929178471, "eval_loss": 0.08828118443489075, "eval_roc_auc": 0.9134011927141672, "eval_runtime": 677.3735, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 17262 }, { "epoch": 63.87, "learning_rate": 1e-05, "loss": 0.086, "step": 17500 }, { "epoch": 64.0, "eval_accuracy": 0.5782881002087683, "eval_f1_macro": 0.8256635970178378, "eval_f1_micro": 0.8667077889306342, "eval_loss": 0.08754145354032516, "eval_roc_auc": 0.9178472944451183, "eval_runtime": 682.5828, "eval_samples_per_second": 4.21, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 17536 }, { "epoch": 65.0, "eval_accuracy": 0.58107167710508, "eval_f1_macro": 0.8277460823758025, "eval_f1_micro": 0.8669750648764526, "eval_loss": 0.08722905069589615, "eval_roc_auc": 0.9159442206991787, "eval_runtime": 673.5072, "eval_samples_per_second": 4.267, "eval_steps_per_second": 0.134, "learning_rate": 1e-05, "step": 17810 }, { "epoch": 65.69, "learning_rate": 1e-05, "loss": 0.0855, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.581767571329158, "eval_f1_macro": 0.8263083392061107, "eval_f1_micro": 0.8662405972512867, "eval_loss": 0.0872766524553299, "eval_roc_auc": 0.9146675753101624, "eval_runtime": 674.2325, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 18084 }, { "epoch": 67.0, "eval_accuracy": 0.5796798886569241, "eval_f1_macro": 0.8236507380069967, "eval_f1_micro": 0.8647603888351997, "eval_loss": 0.08779256045818329, "eval_roc_auc": 0.9121142845321298, "eval_runtime": 672.7686, "eval_samples_per_second": 4.272, "eval_steps_per_second": 0.134, "learning_rate": 1e-05, "step": 18358 }, { "epoch": 67.52, "learning_rate": 1e-05, "loss": 0.0853, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.580723729993041, "eval_f1_macro": 0.82334160354742, "eval_f1_micro": 0.8644058136221144, "eval_loss": 0.08787883818149567, "eval_roc_auc": 0.9110366175644288, "eval_runtime": 678.5717, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 18632 }, { "epoch": 69.0, "eval_accuracy": 0.5831593597773138, "eval_f1_macro": 0.8274164123414606, "eval_f1_micro": 0.8653988078342322, "eval_loss": 0.08730249851942062, "eval_roc_auc": 0.9129307238034322, "eval_runtime": 682.302, "eval_samples_per_second": 4.212, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 18906 }, { "epoch": 69.34, "learning_rate": 1e-05, "loss": 0.0854, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.58107167710508, "eval_f1_macro": 0.8286701109278063, "eval_f1_micro": 0.8661381908135155, "eval_loss": 0.08733326941728592, "eval_roc_auc": 0.9166425383550794, "eval_runtime": 673.3186, "eval_samples_per_second": 4.268, "eval_steps_per_second": 0.134, "learning_rate": 1e-05, "step": 19180 }, { "epoch": 71.0, "eval_accuracy": 0.5779401530967293, "eval_f1_macro": 0.8262073521627441, "eval_f1_micro": 0.865708650324035, "eval_loss": 0.08731996268033981, "eval_roc_auc": 0.9155950369973136, "eval_runtime": 672.8744, "eval_samples_per_second": 4.271, "eval_steps_per_second": 0.134, "learning_rate": 1e-05, "step": 19454 }, { "epoch": 71.17, "learning_rate": 1.0000000000000002e-06, "loss": 0.0847, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.5803757828810021, "eval_f1_macro": 0.8279492189021646, "eval_f1_micro": 0.8660418654245468, "eval_loss": 0.08729101717472076, "eval_roc_auc": 0.9172015860404081, "eval_runtime": 676.9231, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 19728 }, { "epoch": 72.99, "learning_rate": 1.0000000000000002e-06, "loss": 0.0852, "step": 20000 }, { "epoch": 73.0, "eval_accuracy": 0.5765483646485734, "eval_f1_macro": 0.8258696329291023, "eval_f1_micro": 0.8661956034096008, "eval_loss": 0.08899407833814621, "eval_roc_auc": 0.917537916377082, "eval_runtime": 674.8648, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 20002 }, { "epoch": 74.0, "eval_accuracy": 0.5835073068893528, "eval_f1_macro": 0.8266751443826955, "eval_f1_micro": 0.8663119764546072, "eval_loss": 0.08706125617027283, "eval_roc_auc": 0.9144583340958263, "eval_runtime": 676.3788, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 20276 }, { "epoch": 74.82, "learning_rate": 1.0000000000000002e-06, "loss": 0.0845, "step": 20500 }, { "epoch": 75.0, "eval_accuracy": 0.5762004175365344, "eval_f1_macro": 0.8242525331164202, "eval_f1_micro": 0.8650994982806247, "eval_loss": 0.08718431740999222, "eval_roc_auc": 0.9151367489348123, "eval_runtime": 674.1856, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 20550 }, { "epoch": 76.0, "eval_accuracy": 0.5775922059846903, "eval_f1_macro": 0.8258404959868192, "eval_f1_micro": 0.8660362490149724, "eval_loss": 0.08712752908468246, "eval_roc_auc": 0.9161823322373652, "eval_runtime": 676.0536, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 20824 }, { "epoch": 76.64, "learning_rate": 1.0000000000000002e-06, "loss": 0.0849, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.5779401530967293, "eval_f1_macro": 0.8262597281814207, "eval_f1_micro": 0.8654561858576745, "eval_loss": 0.08787967264652252, "eval_roc_auc": 0.915242017185023, "eval_runtime": 678.4216, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 21098 }, { "epoch": 78.0, "eval_accuracy": 0.5779401530967293, "eval_f1_macro": 0.824064674812195, "eval_f1_micro": 0.8647364849581541, "eval_loss": 0.08832630515098572, "eval_roc_auc": 0.9138800063627106, "eval_runtime": 674.504, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 21372 }, { "epoch": 78.47, "learning_rate": 1.0000000000000002e-06, "loss": 0.0853, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.580723729993041, "eval_f1_macro": 0.8283767069034536, "eval_f1_micro": 0.8667153859126425, "eval_loss": 0.08727473765611649, "eval_roc_auc": 0.9170071162464759, "eval_runtime": 680.1183, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.132, "learning_rate": 1.0000000000000002e-06, "step": 21646 }, { "epoch": 80.0, "eval_accuracy": 0.581419624217119, "eval_f1_macro": 0.8257519474670673, "eval_f1_micro": 0.8654216185625353, "eval_loss": 0.08734780550003052, "eval_roc_auc": 0.9139968326920274, "eval_runtime": 682.9935, "eval_samples_per_second": 4.208, "eval_steps_per_second": 0.132, "learning_rate": 1.0000000000000002e-06, "step": 21920 }, { "epoch": 80.29, "learning_rate": 1.0000000000000002e-07, "loss": 0.0838, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.5828114126652749, "eval_f1_macro": 0.8261813753948223, "eval_f1_micro": 0.8653922514039366, "eval_loss": 0.08708538860082626, "eval_roc_auc": 0.9131951648411291, "eval_runtime": 690.614, "eval_samples_per_second": 4.162, "eval_steps_per_second": 0.13, "learning_rate": 1.0000000000000002e-07, "step": 22194 }, { "epoch": 82.0, "eval_accuracy": 0.581767571329158, "eval_f1_macro": 0.8253000981325144, "eval_f1_micro": 0.866888801039137, "eval_loss": 0.08740255981683731, "eval_roc_auc": 0.9155308696670169, "eval_runtime": 680.0034, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.132, "learning_rate": 1.0000000000000002e-07, "step": 22468 }, { "epoch": 82.12, "learning_rate": 1.0000000000000002e-07, "loss": 0.0842, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.5845511482254697, "eval_f1_macro": 0.8282173993454429, "eval_f1_micro": 0.8666929710839298, "eval_loss": 0.08695908635854721, "eval_roc_auc": 0.9160732278767293, "eval_runtime": 685.2501, "eval_samples_per_second": 4.194, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-07, "step": 22742 }, { "epoch": 83.94, "learning_rate": 1.0000000000000002e-07, "loss": 0.0837, "step": 23000 }, { "epoch": 84.0, "eval_accuracy": 0.58107167710508, "eval_f1_macro": 0.8233437650206237, "eval_f1_micro": 0.8627316009866345, "eval_loss": 0.08810650557279587, "eval_roc_auc": 0.9079679208453217, "eval_runtime": 687.4756, "eval_samples_per_second": 4.181, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-07, "step": 23016 }, { "epoch": 85.0, "eval_accuracy": 0.580723729993041, "eval_f1_macro": 0.8276925304690478, "eval_f1_micro": 0.8657459814353634, "eval_loss": 0.08707784116268158, "eval_roc_auc": 0.9141406112899818, "eval_runtime": 688.8064, "eval_samples_per_second": 4.172, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-07, "step": 23290 }, { "epoch": 85.0, "learning_rate": 1.0000000000000002e-07, "step": 23290, "total_flos": 1.1045912459199104e+21, "train_loss": 0.0019856175725272715, "train_runtime": 5622.9029, "train_samples_per_second": 132.483, "train_steps_per_second": 4.142 } ], "logging_steps": 500, "max_steps": 23290, "num_input_tokens_seen": 0, "num_train_epochs": 85, "save_steps": 500, "total_flos": 1.1045912459199104e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }