{ "best_metric": 0.7370102490601179, "best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-4/checkpoint-5112", "epoch": 36.255319148936174, "global_step": 5112, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.01, "eval_loss": 0.3023780882358551, "eval_macro_f1": 0.04276761517615176, "eval_macro_precision": 0.03250514933058703, "eval_macro_recall": 0.0625, "eval_micro_f1": 0.4057854560064283, "eval_micro_precision": 0.5200823892893924, "eval_micro_recall": 0.3326745718050066, "eval_runtime": 2.6706, "eval_samples_per_second": 363.595, "eval_steps_per_second": 22.842, "step": 142 }, { "epoch": 2.01, "eval_loss": 0.2355797439813614, "eval_macro_f1": 0.0478700416730977, "eval_macro_precision": 0.10847007722007722, "eval_macro_recall": 0.04781835896355593, "eval_micro_f1": 0.3766552231486022, "eval_micro_precision": 0.7370441458733206, "eval_micro_recall": 0.25296442687747034, "eval_runtime": 2.6743, "eval_samples_per_second": 363.08, "eval_steps_per_second": 22.809, "step": 284 }, { "epoch": 3.02, "eval_loss": 0.20839156210422516, "eval_macro_f1": 0.1270678623977507, "eval_macro_precision": 0.15146020789583886, "eval_macro_recall": 0.11056835837347367, "eval_micro_f1": 0.5080789946140036, "eval_micro_precision": 0.7971830985915493, "eval_micro_recall": 0.37285902503293805, "eval_runtime": 2.6737, "eval_samples_per_second": 363.172, "eval_steps_per_second": 22.815, "step": 426 }, { "epoch": 3.55, "learning_rate": 4.760888771678367e-05, "loss": 0.3067, "step": 500 }, { "epoch": 4.03, "eval_loss": 0.17458897829055786, "eval_macro_f1": 0.2134043623340923, "eval_macro_precision": 0.2555480902740095, "eval_macro_recall": 0.1956241201896679, "eval_micro_f1": 0.652869972666927, "eval_micro_precision": 0.8015340364333653, "eval_micro_recall": 0.5507246376811594, "eval_runtime": 2.6714, "eval_samples_per_second": 363.484, "eval_steps_per_second": 22.835, "step": 568 }, { "epoch": 5.04, "eval_loss": 0.1594998985528946, "eval_macro_f1": 0.322947699403185, "eval_macro_precision": 0.33082406427783345, "eval_macro_recall": 0.31592054350156284, "eval_micro_f1": 0.7086383601756956, "eval_micro_precision": 0.7973640856672158, "eval_micro_recall": 0.6376811594202898, "eval_runtime": 2.6774, "eval_samples_per_second": 362.663, "eval_steps_per_second": 22.783, "step": 710 }, { "epoch": 6.04, "eval_loss": 0.14328011870384216, "eval_macro_f1": 0.35552541662372633, "eval_macro_precision": 0.4154000509380286, "eval_macro_recall": 0.3384352474665031, "eval_micro_f1": 0.730332967435053, "eval_micro_precision": 0.8213991769547325, "eval_micro_recall": 0.6574440052700923, "eval_runtime": 2.6759, "eval_samples_per_second": 362.867, "eval_steps_per_second": 22.796, "step": 852 }, { "epoch": 7.05, "eval_loss": 0.1372288167476654, "eval_macro_f1": 0.35769698646328396, "eval_macro_precision": 0.45729587282684003, "eval_macro_recall": 0.340956446807801, "eval_micro_f1": 0.747014115092291, "eval_micro_precision": 0.8289156626506025, "eval_micro_recall": 0.6798418972332015, "eval_runtime": 2.6665, "eval_samples_per_second": 364.154, "eval_steps_per_second": 22.877, "step": 994 }, { "epoch": 7.09, "learning_rate": 4.585856096249015e-05, "loss": 0.1366, "step": 1000 }, { "epoch": 8.06, "eval_loss": 0.13587501645088196, "eval_macro_f1": 0.4519999262177494, "eval_macro_precision": 0.6383482484756009, "eval_macro_recall": 0.4120606473790193, "eval_micro_f1": 0.7476635514018692, "eval_micro_precision": 0.8227848101265823, "eval_micro_recall": 0.6851119894598156, "eval_runtime": 2.6678, "eval_samples_per_second": 363.976, "eval_steps_per_second": 22.866, "step": 1136 }, { "epoch": 9.06, "eval_loss": 0.1254434585571289, "eval_macro_f1": 0.5752781473187552, "eval_macro_precision": 0.6682540412600851, "eval_macro_recall": 0.5248832453258128, "eval_micro_f1": 0.7869884575026234, "eval_micro_precision": 0.8389261744966443, "eval_micro_recall": 0.741106719367589, "eval_runtime": 2.6676, "eval_samples_per_second": 364.002, "eval_steps_per_second": 22.867, "step": 1278 }, { "epoch": 10.07, "eval_loss": 0.12716087698936462, "eval_macro_f1": 0.5783160913404322, "eval_macro_precision": 0.6669120855288475, "eval_macro_recall": 0.5333612306839322, "eval_micro_f1": 0.7954701441317777, "eval_micro_precision": 0.8302292263610315, "eval_micro_recall": 0.7635046113306982, "eval_runtime": 2.6683, "eval_samples_per_second": 363.896, "eval_steps_per_second": 22.861, "step": 1420 }, { "epoch": 10.64, "learning_rate": 4.410823420819664e-05, "loss": 0.0661, "step": 1500 }, { "epoch": 11.08, "eval_loss": 0.12210354208946228, "eval_macro_f1": 0.6129525814973475, "eval_macro_precision": 0.694661979564102, "eval_macro_recall": 0.5604968051311103, "eval_micro_f1": 0.8090971743625087, "eval_micro_precision": 0.8482658959537572, "eval_micro_recall": 0.7733860342555995, "eval_runtime": 2.6691, "eval_samples_per_second": 363.79, "eval_steps_per_second": 22.854, "step": 1562 }, { "epoch": 12.09, "eval_loss": 0.13834641873836517, "eval_macro_f1": 0.59964421079272, "eval_macro_precision": 0.6259545070217613, "eval_macro_recall": 0.5849130461175929, "eval_micro_f1": 0.7874705287975748, "eval_micro_precision": 0.8056512749827704, "eval_micro_recall": 0.7700922266139657, "eval_runtime": 2.6674, "eval_samples_per_second": 364.02, "eval_steps_per_second": 22.868, "step": 1704 }, { "epoch": 13.09, "eval_loss": 0.13302326202392578, "eval_macro_f1": 0.6249414362192053, "eval_macro_precision": 0.6603169616331872, "eval_macro_recall": 0.6008160113233191, "eval_micro_f1": 0.8105579685933846, "eval_micro_precision": 0.8223728813559322, "eval_micro_recall": 0.7990777338603425, "eval_runtime": 2.6684, "eval_samples_per_second": 363.887, "eval_steps_per_second": 22.86, "step": 1846 }, { "epoch": 14.1, "eval_loss": 0.13799144327640533, "eval_macro_f1": 0.6256821918613437, "eval_macro_precision": 0.6740205274811021, "eval_macro_recall": 0.5938987995613589, "eval_micro_f1": 0.8119891008174386, "eval_micro_precision": 0.840620592383639, "eval_micro_recall": 0.7852437417654808, "eval_runtime": 2.6691, "eval_samples_per_second": 363.79, "eval_steps_per_second": 22.854, "step": 1988 }, { "epoch": 14.18, "learning_rate": 4.235790745390312e-05, "loss": 0.0324, "step": 2000 }, { "epoch": 15.11, "eval_loss": 0.13957080245018005, "eval_macro_f1": 0.6541379860188454, "eval_macro_precision": 0.7002594602789083, "eval_macro_recall": 0.6359217043250158, "eval_micro_f1": 0.803843605036448, "eval_micro_precision": 0.8086666666666666, "eval_micro_recall": 0.7990777338603425, "eval_runtime": 2.6709, "eval_samples_per_second": 363.55, "eval_steps_per_second": 22.839, "step": 2130 }, { "epoch": 16.11, "eval_loss": 0.13600043952465057, "eval_macro_f1": 0.6528569810495737, "eval_macro_precision": 0.7119806310239326, "eval_macro_recall": 0.6199612374678921, "eval_micro_f1": 0.8169491525423729, "eval_micro_precision": 0.8414804469273743, "eval_micro_recall": 0.7938076416337286, "eval_runtime": 2.6686, "eval_samples_per_second": 363.864, "eval_steps_per_second": 22.859, "step": 2272 }, { "epoch": 17.12, "eval_loss": 0.1411595642566681, "eval_macro_f1": 0.6780053870985077, "eval_macro_precision": 0.7825949490722317, "eval_macro_recall": 0.6371295063080809, "eval_micro_f1": 0.8134328358208954, "eval_micro_precision": 0.8384615384615385, "eval_micro_recall": 0.7898550724637681, "eval_runtime": 2.6685, "eval_samples_per_second": 363.87, "eval_steps_per_second": 22.859, "step": 2414 }, { "epoch": 17.73, "learning_rate": 4.06075806996096e-05, "loss": 0.0173, "step": 2500 }, { "epoch": 18.13, "eval_loss": 0.14683738350868225, "eval_macro_f1": 0.6538188838769178, "eval_macro_precision": 0.7058131112592007, "eval_macro_recall": 0.628120629850237, "eval_micro_f1": 0.8044280442804428, "eval_micro_precision": 0.8195488721804511, "eval_micro_recall": 0.7898550724637681, "eval_runtime": 2.6704, "eval_samples_per_second": 363.611, "eval_steps_per_second": 22.843, "step": 2556 }, { "epoch": 19.13, "eval_loss": 0.14477181434631348, "eval_macro_f1": 0.7213223353389469, "eval_macro_precision": 0.7931053666626622, "eval_macro_recall": 0.6989155005450692, "eval_micro_f1": 0.8122731771692511, "eval_micro_precision": 0.813615333773959, "eval_micro_recall": 0.810935441370224, "eval_runtime": 2.6682, "eval_samples_per_second": 363.916, "eval_steps_per_second": 22.862, "step": 2698 }, { "epoch": 20.14, "eval_loss": 0.1553182750940323, "eval_macro_f1": 0.6767777822180807, "eval_macro_precision": 0.7296284296772766, "eval_macro_recall": 0.6640188299255232, "eval_micro_f1": 0.8082867477803354, "eval_micro_precision": 0.8069599474720945, "eval_micro_recall": 0.8096179183135704, "eval_runtime": 2.6706, "eval_samples_per_second": 363.587, "eval_steps_per_second": 22.841, "step": 2840 }, { "epoch": 21.15, "eval_loss": 0.14831620454788208, "eval_macro_f1": 0.6951923518777028, "eval_macro_precision": 0.8479068478364982, "eval_macro_recall": 0.6493756779822191, "eval_micro_f1": 0.8177506775067751, "eval_micro_precision": 0.8417015341701534, "eval_micro_recall": 0.7951251646903821, "eval_runtime": 2.6679, "eval_samples_per_second": 363.953, "eval_steps_per_second": 22.864, "step": 2982 }, { "epoch": 21.28, "learning_rate": 3.885725394531609e-05, "loss": 0.0121, "step": 3000 }, { "epoch": 22.16, "eval_loss": 0.1529681533575058, "eval_macro_f1": 0.7215214471733791, "eval_macro_precision": 0.840496134606828, "eval_macro_recall": 0.6777226713039917, "eval_micro_f1": 0.8099395567494962, "eval_micro_precision": 0.826027397260274, "eval_micro_recall": 0.7944664031620553, "eval_runtime": 2.6724, "eval_samples_per_second": 363.343, "eval_steps_per_second": 22.826, "step": 3124 }, { "epoch": 23.16, "eval_loss": 0.15208803117275238, "eval_macro_f1": 0.7282532116551124, "eval_macro_precision": 0.8178655579947314, "eval_macro_recall": 0.702450635375965, "eval_micro_f1": 0.8099009900990098, "eval_micro_precision": 0.8115079365079365, "eval_micro_recall": 0.808300395256917, "eval_runtime": 2.6735, "eval_samples_per_second": 363.196, "eval_steps_per_second": 22.817, "step": 3266 }, { "epoch": 24.17, "eval_loss": 0.17097046971321106, "eval_macro_f1": 0.6781929633024913, "eval_macro_precision": 0.7664743620916477, "eval_macro_recall": 0.647287649907105, "eval_micro_f1": 0.8018836192398252, "eval_micro_precision": 0.8192439862542955, "eval_micro_recall": 0.7852437417654808, "eval_runtime": 2.6679, "eval_samples_per_second": 363.956, "eval_steps_per_second": 22.864, "step": 3408 }, { "epoch": 24.82, "learning_rate": 3.710692719102257e-05, "loss": 0.0086, "step": 3500 }, { "epoch": 25.18, "eval_loss": 0.16351111233234406, "eval_macro_f1": 0.7009939961760294, "eval_macro_precision": 0.8049793687947511, "eval_macro_recall": 0.6631263784729529, "eval_micro_f1": 0.8080672268907563, "eval_micro_precision": 0.8249828414550446, "eval_micro_recall": 0.7918313570487484, "eval_runtime": 2.6758, "eval_samples_per_second": 362.882, "eval_steps_per_second": 22.797, "step": 3550 }, { "epoch": 26.18, "eval_loss": 0.1703951209783554, "eval_macro_f1": 0.728858993061112, "eval_macro_precision": 0.8293166501604144, "eval_macro_recall": 0.6848344575219967, "eval_micro_f1": 0.8101945003353456, "eval_micro_precision": 0.825136612021858, "eval_micro_recall": 0.7957839262187089, "eval_runtime": 2.6853, "eval_samples_per_second": 361.599, "eval_steps_per_second": 22.716, "step": 3692 }, { "epoch": 27.19, "eval_loss": 0.1729104220867157, "eval_macro_f1": 0.7246800474910258, "eval_macro_precision": 0.8088443156400115, "eval_macro_recall": 0.6938957592472167, "eval_micro_f1": 0.8112827400940228, "eval_micro_precision": 0.8273972602739726, "eval_micro_recall": 0.7957839262187089, "eval_runtime": 2.6704, "eval_samples_per_second": 363.619, "eval_steps_per_second": 22.843, "step": 3834 }, { "epoch": 28.2, "eval_loss": 0.17421075701713562, "eval_macro_f1": 0.721641920467399, "eval_macro_precision": 0.7953793533738551, "eval_macro_recall": 0.6939586213926427, "eval_micro_f1": 0.8118745830553702, "eval_micro_precision": 0.8222972972972973, "eval_micro_recall": 0.8017127799736495, "eval_runtime": 2.6674, "eval_samples_per_second": 364.019, "eval_steps_per_second": 22.868, "step": 3976 }, { "epoch": 28.37, "learning_rate": 3.535660043672905e-05, "loss": 0.0054, "step": 4000 }, { "epoch": 29.21, "eval_loss": 0.17936377227306366, "eval_macro_f1": 0.7165402537125084, "eval_macro_precision": 0.7314012003015316, "eval_macro_recall": 0.7193045979731636, "eval_micro_f1": 0.8045826513911619, "eval_micro_precision": 0.7996096291476903, "eval_micro_recall": 0.8096179183135704, "eval_runtime": 2.669, "eval_samples_per_second": 363.809, "eval_steps_per_second": 22.855, "step": 4118 }, { "epoch": 30.21, "eval_loss": 0.184128999710083, "eval_macro_f1": 0.7249069877656021, "eval_macro_precision": 0.7920679958383108, "eval_macro_recall": 0.6986794530201526, "eval_micro_f1": 0.8046822742474916, "eval_micro_precision": 0.8172554347826086, "eval_micro_recall": 0.7924901185770751, "eval_runtime": 2.6699, "eval_samples_per_second": 363.687, "eval_steps_per_second": 22.847, "step": 4260 }, { "epoch": 31.22, "eval_loss": 0.18270088732242584, "eval_macro_f1": 0.7341637485973148, "eval_macro_precision": 0.8378115033399074, "eval_macro_recall": 0.6892808840460984, "eval_micro_f1": 0.8134680134680136, "eval_micro_precision": 0.8319559228650137, "eval_micro_recall": 0.7957839262187089, "eval_runtime": 2.6693, "eval_samples_per_second": 363.77, "eval_steps_per_second": 22.853, "step": 4402 }, { "epoch": 31.91, "learning_rate": 3.3606273682435536e-05, "loss": 0.0035, "step": 4500 }, { "epoch": 32.23, "eval_loss": 0.18414482474327087, "eval_macro_f1": 0.7314425663595913, "eval_macro_precision": 0.8099477622958757, "eval_macro_recall": 0.6986944147838622, "eval_micro_f1": 0.8134003350083752, "eval_micro_precision": 0.8275391956373551, "eval_micro_recall": 0.7997364953886693, "eval_runtime": 2.6686, "eval_samples_per_second": 363.857, "eval_steps_per_second": 22.858, "step": 4544 }, { "epoch": 33.23, "eval_loss": 0.18519891798496246, "eval_macro_f1": 0.7352936873025266, "eval_macro_precision": 0.8265688293622399, "eval_macro_recall": 0.6958315897622773, "eval_micro_f1": 0.8212722988892629, "eval_micro_precision": 0.8396421197522368, "eval_micro_recall": 0.8036890645586298, "eval_runtime": 2.6687, "eval_samples_per_second": 363.853, "eval_steps_per_second": 22.858, "step": 4686 }, { "epoch": 34.24, "eval_loss": 0.1865757256746292, "eval_macro_f1": 0.7207303759640109, "eval_macro_precision": 0.7610227795725935, "eval_macro_recall": 0.6996289765073358, "eval_micro_f1": 0.8130245048674052, "eval_micro_precision": 0.8288843258042436, "eval_micro_recall": 0.7977602108036891, "eval_runtime": 2.6701, "eval_samples_per_second": 363.659, "eval_steps_per_second": 22.846, "step": 4828 }, { "epoch": 35.25, "eval_loss": 0.19158615171909332, "eval_macro_f1": 0.7231833700345036, "eval_macro_precision": 0.759857266859788, "eval_macro_recall": 0.7038379676451936, "eval_micro_f1": 0.8071928071928073, "eval_micro_precision": 0.8161616161616162, "eval_micro_recall": 0.7984189723320159, "eval_runtime": 2.6696, "eval_samples_per_second": 363.724, "eval_steps_per_second": 22.85, "step": 4970 }, { "epoch": 35.46, "learning_rate": 3.185594692814201e-05, "loss": 0.0025, "step": 5000 }, { "epoch": 36.26, "eval_loss": 0.18590226769447327, "eval_macro_f1": 0.7370102490601179, "eval_macro_precision": 0.8141501549264045, "eval_macro_recall": 0.7143003391573518, "eval_micro_f1": 0.8030253206182177, "eval_micro_precision": 0.8017071569271176, "eval_micro_recall": 0.8043478260869565, "eval_runtime": 2.6693, "eval_samples_per_second": 363.761, "eval_steps_per_second": 22.852, "step": 5112 } ], "max_steps": 14100, "num_train_epochs": 100, "total_flos": 1.9741043739581184e+16, "trial_name": null, "trial_params": { "adam_epsilon": 2.7636948844125687e-08, "learning_rate": 4.7699904708006934e-05, "per_device_eval_batch_size": 16, "per_device_train_batch_size": 16, "seed": 320, "warmup_steps": 474, "weight_decay": 0.08343382340090989 } }