{ "best_metric": 0.8164585392148055, "best_model_checkpoint": "./XLM-V-multi-outputs/checkpoint-40000", "epoch": 19.82300884955752, "eval_steps": 1000, "global_step": 56000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35398230088495575, "grad_norm": 1.4846587181091309, "learning_rate": 3.5398230088495575e-07, "loss": 0.6947, "step": 1000 }, { "epoch": 0.35398230088495575, "eval_accuracy": 0.5090112516180424, "eval_f1": 0.6746288353678654, "eval_loss": 0.6931052207946777, "eval_precision": 0.5090112516180424, "eval_recall": 1.0, "eval_runtime": 53.8213, "eval_samples_per_second": 186.599, "eval_steps_per_second": 5.834, "step": 1000 }, { "epoch": 0.7079646017699115, "grad_norm": 3.2122230529785156, "learning_rate": 7.079646017699115e-07, "loss": 0.6924, "step": 2000 }, { "epoch": 0.7079646017699115, "eval_accuracy": 0.5752265259384646, "eval_f1": 0.698601102161933, "eval_loss": 0.681232213973999, "eval_precision": 0.5467816854678168, "eval_recall": 0.9671361502347418, "eval_runtime": 53.8929, "eval_samples_per_second": 186.351, "eval_steps_per_second": 5.826, "step": 2000 }, { "epoch": 1.0619469026548674, "grad_norm": 4.78421688079834, "learning_rate": 1.0619469026548673e-06, "loss": 0.6783, "step": 3000 }, { "epoch": 1.0619469026548674, "eval_accuracy": 0.6099770984765508, "eval_f1": 0.7189092213850018, "eval_loss": 0.6526923179626465, "eval_precision": 0.5677207299104613, "eval_recall": 0.9798513302034428, "eval_runtime": 53.7592, "eval_samples_per_second": 186.814, "eval_steps_per_second": 5.841, "step": 3000 }, { "epoch": 1.415929203539823, "grad_norm": 49.178401947021484, "learning_rate": 1.415929203539823e-06, "loss": 0.6613, "step": 4000 }, { "epoch": 1.415929203539823, "eval_accuracy": 0.6256098775266354, "eval_f1": 0.7167821633021995, "eval_loss": 0.633762538433075, "eval_precision": 0.5828025477707006, "eval_recall": 0.9307511737089202, "eval_runtime": 53.5833, "eval_samples_per_second": 187.428, "eval_steps_per_second": 5.86, "step": 4000 }, { "epoch": 1.7699115044247788, "grad_norm": 11.674603462219238, "learning_rate": 1.7699115044247788e-06, "loss": 0.6551, "step": 5000 }, { "epoch": 1.7699115044247788, "eval_accuracy": 0.6342726277008862, "eval_f1": 0.7293493478741434, "eval_loss": 0.6212999820709229, "eval_precision": 0.5850573353824329, "eval_recall": 0.968114241001565, "eval_runtime": 53.825, "eval_samples_per_second": 186.586, "eval_steps_per_second": 5.834, "step": 5000 }, { "epoch": 2.1238938053097347, "grad_norm": 5.5286545753479, "learning_rate": 1.986234021632252e-06, "loss": 0.6481, "step": 6000 }, { "epoch": 2.1238938053097347, "eval_accuracy": 0.656078860898138, "eval_f1": 0.740222623345367, "eval_loss": 0.6046485900878906, "eval_precision": 0.6012952101661779, "eval_recall": 0.9626369327073553, "eval_runtime": 53.8663, "eval_samples_per_second": 186.443, "eval_steps_per_second": 5.829, "step": 6000 }, { "epoch": 2.47787610619469, "grad_norm": 47.20358657836914, "learning_rate": 1.9469026548672563e-06, "loss": 0.639, "step": 7000 }, { "epoch": 2.47787610619469, "eval_accuracy": 0.6773872348899731, "eval_f1": 0.7519142419601837, "eval_loss": 0.5804159045219421, "eval_precision": 0.6177654755913438, "eval_recall": 0.9604851330203443, "eval_runtime": 53.8098, "eval_samples_per_second": 186.639, "eval_steps_per_second": 5.835, "step": 7000 }, { "epoch": 2.831858407079646, "grad_norm": 20.222881317138672, "learning_rate": 1.9075712881022612e-06, "loss": 0.63, "step": 8000 }, { "epoch": 2.831858407079646, "eval_accuracy": 0.6816688240565568, "eval_f1": 0.6923299008757579, "eval_loss": 0.584140419960022, "eval_precision": 0.6813790490623224, "eval_recall": 0.7036384976525821, "eval_runtime": 53.9333, "eval_samples_per_second": 186.211, "eval_steps_per_second": 5.822, "step": 8000 }, { "epoch": 3.185840707964602, "grad_norm": 8.778471946716309, "learning_rate": 1.8682399213372663e-06, "loss": 0.6087, "step": 9000 }, { "epoch": 3.185840707964602, "eval_accuracy": 0.7025789106840585, "eval_f1": 0.747271342753194, "eval_loss": 0.5566529035568237, "eval_precision": 0.6584165796928582, "eval_recall": 0.863849765258216, "eval_runtime": 53.826, "eval_samples_per_second": 186.583, "eval_steps_per_second": 5.834, "step": 9000 }, { "epoch": 3.5398230088495577, "grad_norm": 16.00111198425293, "learning_rate": 1.8289085545722714e-06, "loss": 0.5851, "step": 10000 }, { "epoch": 3.5398230088495577, "eval_accuracy": 0.708553221149059, "eval_f1": 0.7571155920670484, "eval_loss": 0.5440040826797485, "eval_precision": 0.6574434356535523, "eval_recall": 0.8924100156494522, "eval_runtime": 53.8534, "eval_samples_per_second": 186.488, "eval_steps_per_second": 5.831, "step": 10000 }, { "epoch": 3.893805309734513, "grad_norm": 4.33933162689209, "learning_rate": 1.7895771878072763e-06, "loss": 0.5516, "step": 11000 }, { "epoch": 3.893805309734513, "eval_accuracy": 0.7223937070596436, "eval_f1": 0.7767456758488148, "eval_loss": 0.4870859384536743, "eval_precision": 0.6575379609544468, "eval_recall": 0.9487480438184663, "eval_runtime": 53.9228, "eval_samples_per_second": 186.248, "eval_steps_per_second": 5.823, "step": 11000 }, { "epoch": 4.247787610619469, "grad_norm": 13.392922401428223, "learning_rate": 1.7502458210422812e-06, "loss": 0.495, "step": 12000 }, { "epoch": 4.247787610619469, "eval_accuracy": 0.7339440406253112, "eval_f1": 0.7293903180068868, "eval_loss": 0.43779730796813965, "eval_precision": 0.7561948761024779, "eval_recall": 0.7044209702660407, "eval_runtime": 53.9151, "eval_samples_per_second": 186.274, "eval_steps_per_second": 5.824, "step": 12000 }, { "epoch": 4.601769911504425, "grad_norm": 6.77086067199707, "learning_rate": 1.7109144542772859e-06, "loss": 0.4655, "step": 13000 }, { "epoch": 4.601769911504425, "eval_accuracy": 0.7499751070397291, "eval_f1": 0.7768991559306975, "eval_loss": 0.41514188051223755, "eval_precision": 0.7117043789679309, "eval_recall": 0.8552425665101722, "eval_runtime": 53.8935, "eval_samples_per_second": 186.349, "eval_steps_per_second": 5.826, "step": 13000 }, { "epoch": 4.95575221238938, "grad_norm": 6.903416156768799, "learning_rate": 1.671583087512291e-06, "loss": 0.4451, "step": 14000 }, { "epoch": 4.95575221238938, "eval_accuracy": 0.7510703972916459, "eval_f1": 0.7728511720879521, "eval_loss": 0.4052245318889618, "eval_precision": 0.7215812690872073, "eval_recall": 0.831964006259781, "eval_runtime": 53.8879, "eval_samples_per_second": 186.368, "eval_steps_per_second": 5.827, "step": 14000 }, { "epoch": 5.3097345132743365, "grad_norm": 24.238162994384766, "learning_rate": 1.6322517207472959e-06, "loss": 0.4275, "step": 15000 }, { "epoch": 5.3097345132743365, "eval_accuracy": 0.7568455640744798, "eval_f1": 0.7681792291627112, "eval_loss": 0.3967246115207672, "eval_precision": 0.7462191073404648, "eval_recall": 0.7914710485133021, "eval_runtime": 53.89, "eval_samples_per_second": 186.361, "eval_steps_per_second": 5.827, "step": 15000 }, { "epoch": 5.663716814159292, "grad_norm": 22.419418334960938, "learning_rate": 1.5929203539823008e-06, "loss": 0.4085, "step": 16000 }, { "epoch": 5.663716814159292, "eval_accuracy": 0.7561485611868963, "eval_f1": 0.7775456444727041, "eval_loss": 0.39186546206474304, "eval_precision": 0.7257927759877904, "eval_recall": 0.837245696400626, "eval_runtime": 54.0231, "eval_samples_per_second": 185.902, "eval_steps_per_second": 5.812, "step": 16000 }, { "epoch": 6.017699115044247, "grad_norm": 25.710134506225586, "learning_rate": 1.5535889872173059e-06, "loss": 0.4088, "step": 17000 }, { "epoch": 6.017699115044247, "eval_accuracy": 0.7595340037837299, "eval_f1": 0.7705027083531313, "eval_loss": 0.3929796516895294, "eval_precision": 0.7492145629273702, "eval_recall": 0.793035993740219, "eval_runtime": 53.8892, "eval_samples_per_second": 186.364, "eval_steps_per_second": 5.827, "step": 17000 }, { "epoch": 6.371681415929204, "grad_norm": 12.00070571899414, "learning_rate": 1.5142576204523108e-06, "loss": 0.3982, "step": 18000 }, { "epoch": 6.371681415929204, "eval_accuracy": 0.756347704869063, "eval_f1": 0.7932753231393089, "eval_loss": 0.38643109798431396, "eval_precision": 0.6981412639405205, "eval_recall": 0.9184272300469484, "eval_runtime": 53.924, "eval_samples_per_second": 186.243, "eval_steps_per_second": 5.823, "step": 18000 }, { "epoch": 6.725663716814159, "grad_norm": 8.227592468261719, "learning_rate": 1.4749262536873155e-06, "loss": 0.3952, "step": 19000 }, { "epoch": 6.725663716814159, "eval_accuracy": 0.7651100268843971, "eval_f1": 0.776673293571902, "eval_loss": 0.3926367461681366, "eval_precision": 0.7525224729407448, "eval_recall": 0.8024256651017214, "eval_runtime": 53.9659, "eval_samples_per_second": 186.099, "eval_steps_per_second": 5.818, "step": 19000 }, { "epoch": 7.079646017699115, "grad_norm": 5.537351131439209, "learning_rate": 1.4355948869223204e-06, "loss": 0.3876, "step": 20000 }, { "epoch": 7.079646017699115, "eval_accuracy": 0.763915164791397, "eval_f1": 0.792472647702407, "eval_loss": 0.38483595848083496, "eval_precision": 0.7170917155076826, "eval_recall": 0.8855633802816901, "eval_runtime": 53.9366, "eval_samples_per_second": 186.2, "eval_steps_per_second": 5.822, "step": 20000 }, { "epoch": 7.433628318584071, "grad_norm": 4.749475002288818, "learning_rate": 1.3962635201573255e-06, "loss": 0.3832, "step": 21000 }, { "epoch": 7.433628318584071, "eval_accuracy": 0.7616250124464802, "eval_f1": 0.7432432432432432, "eval_loss": 0.38059765100479126, "eval_precision": 0.8226495726495726, "eval_recall": 0.6778169014084507, "eval_runtime": 53.9079, "eval_samples_per_second": 186.299, "eval_steps_per_second": 5.825, "step": 21000 }, { "epoch": 7.787610619469026, "grad_norm": 9.643062591552734, "learning_rate": 1.3569321533923304e-06, "loss": 0.3778, "step": 22000 }, { "epoch": 7.787610619469026, "eval_accuracy": 0.7705864781439808, "eval_f1": 0.7876889052709178, "eval_loss": 0.37772777676582336, "eval_precision": 0.7445993031358885, "eval_recall": 0.8360719874804382, "eval_runtime": 53.9925, "eval_samples_per_second": 186.007, "eval_steps_per_second": 5.816, "step": 22000 }, { "epoch": 8.141592920353983, "grad_norm": 5.115036487579346, "learning_rate": 1.3176007866273352e-06, "loss": 0.3832, "step": 23000 }, { "epoch": 8.141592920353983, "eval_accuracy": 0.7656078860898138, "eval_f1": 0.7999320074791773, "eval_loss": 0.3883359134197235, "eval_precision": 0.7072437631499849, "eval_recall": 0.9205790297339593, "eval_runtime": 53.8543, "eval_samples_per_second": 186.485, "eval_steps_per_second": 5.831, "step": 23000 }, { "epoch": 8.495575221238939, "grad_norm": 3.359361410140991, "learning_rate": 1.2782694198623401e-06, "loss": 0.3723, "step": 24000 }, { "epoch": 8.495575221238939, "eval_accuracy": 0.7721796276013143, "eval_f1": 0.7823853909073616, "eval_loss": 0.3792983293533325, "eval_precision": 0.7613846723435764, "eval_recall": 0.8045774647887324, "eval_runtime": 53.9123, "eval_samples_per_second": 186.284, "eval_steps_per_second": 5.824, "step": 24000 }, { "epoch": 8.849557522123893, "grad_norm": 4.47456169128418, "learning_rate": 1.238938053097345e-06, "loss": 0.369, "step": 25000 }, { "epoch": 8.849557522123893, "eval_accuracy": 0.7759633575624814, "eval_f1": 0.7839446898405992, "eval_loss": 0.37068039178848267, "eval_precision": 0.7698981516408903, "eval_recall": 0.7985133020344288, "eval_runtime": 53.9248, "eval_samples_per_second": 186.241, "eval_steps_per_second": 5.823, "step": 25000 }, { "epoch": 9.20353982300885, "grad_norm": 8.337353706359863, "learning_rate": 1.19960668633235e-06, "loss": 0.3683, "step": 26000 }, { "epoch": 9.20353982300885, "eval_accuracy": 0.7734740615353978, "eval_f1": 0.7609540821687507, "eval_loss": 0.3703781068325043, "eval_precision": 0.8220204313280364, "eval_recall": 0.7083333333333334, "eval_runtime": 53.895, "eval_samples_per_second": 186.344, "eval_steps_per_second": 5.826, "step": 26000 }, { "epoch": 9.557522123893806, "grad_norm": 8.439238548278809, "learning_rate": 1.1602753195673548e-06, "loss": 0.362, "step": 27000 }, { "epoch": 9.557522123893806, "eval_accuracy": 0.7749676391516479, "eval_f1": 0.793644996347699, "eval_loss": 0.3685624301433563, "eval_precision": 0.7441780821917808, "eval_recall": 0.8501564945226917, "eval_runtime": 53.8072, "eval_samples_per_second": 186.648, "eval_steps_per_second": 5.836, "step": 27000 }, { "epoch": 9.91150442477876, "grad_norm": 9.439690589904785, "learning_rate": 1.12094395280236e-06, "loss": 0.3566, "step": 28000 }, { "epoch": 9.91150442477876, "eval_accuracy": 0.7782535099073982, "eval_f1": 0.7853080111828786, "eval_loss": 0.3649461269378662, "eval_precision": 0.7741874168409048, "eval_recall": 0.7967527386541471, "eval_runtime": 53.8844, "eval_samples_per_second": 186.38, "eval_steps_per_second": 5.827, "step": 28000 }, { "epoch": 10.265486725663717, "grad_norm": 9.049650192260742, "learning_rate": 1.0816125860373648e-06, "loss": 0.3554, "step": 29000 }, { "epoch": 10.265486725663717, "eval_accuracy": 0.7810415214577318, "eval_f1": 0.7799899949974988, "eval_loss": 0.3608804941177368, "eval_precision": 0.7982797460577514, "eval_recall": 0.7625195618153364, "eval_runtime": 53.8179, "eval_samples_per_second": 186.611, "eval_steps_per_second": 5.834, "step": 29000 }, { "epoch": 10.619469026548673, "grad_norm": 5.94489860534668, "learning_rate": 1.0422812192723695e-06, "loss": 0.3486, "step": 30000 }, { "epoch": 10.619469026548673, "eval_accuracy": 0.7845265358956487, "eval_f1": 0.7943747624477385, "eval_loss": 0.3635348081588745, "eval_precision": 0.7723577235772358, "eval_recall": 0.8176838810641628, "eval_runtime": 53.7295, "eval_samples_per_second": 186.918, "eval_steps_per_second": 5.844, "step": 30000 }, { "epoch": 10.973451327433628, "grad_norm": 22.234378814697266, "learning_rate": 1.0029498525073746e-06, "loss": 0.3506, "step": 31000 }, { "epoch": 10.973451327433628, "eval_accuracy": 0.7844269640545654, "eval_f1": 0.7878075075958052, "eval_loss": 0.3596493601799011, "eval_precision": 0.7894323315655077, "eval_recall": 0.786189358372457, "eval_runtime": 53.8293, "eval_samples_per_second": 186.571, "eval_steps_per_second": 5.833, "step": 31000 }, { "epoch": 11.327433628318584, "grad_norm": 12.47771167755127, "learning_rate": 9.636184857423795e-07, "loss": 0.3441, "step": 32000 }, { "epoch": 11.327433628318584, "eval_accuracy": 0.7883102658568157, "eval_f1": 0.785901309164149, "eval_loss": 0.35861310362815857, "eval_precision": 0.8098796180987962, "eval_recall": 0.763302034428795, "eval_runtime": 53.7987, "eval_samples_per_second": 186.677, "eval_steps_per_second": 5.837, "step": 32000 }, { "epoch": 11.68141592920354, "grad_norm": 13.281210899353027, "learning_rate": 9.242871189773844e-07, "loss": 0.3416, "step": 33000 }, { "epoch": 11.68141592920354, "eval_accuracy": 0.7906004182017325, "eval_f1": 0.7919263876521223, "eval_loss": 0.3558104336261749, "eval_precision": 0.8012012012012012, "eval_recall": 0.7828638497652582, "eval_runtime": 53.8593, "eval_samples_per_second": 186.467, "eval_steps_per_second": 5.83, "step": 33000 }, { "epoch": 12.035398230088495, "grad_norm": 13.627979278564453, "learning_rate": 8.849557522123894e-07, "loss": 0.3419, "step": 34000 }, { "epoch": 12.035398230088495, "eval_accuracy": 0.7892064124265658, "eval_f1": 0.8029781293624941, "eval_loss": 0.35980644822120667, "eval_precision": 0.7658441327889224, "eval_recall": 0.8438967136150235, "eval_runtime": 53.8953, "eval_samples_per_second": 186.343, "eval_steps_per_second": 5.826, "step": 34000 }, { "epoch": 12.389380530973451, "grad_norm": 23.55402374267578, "learning_rate": 8.456243854473943e-07, "loss": 0.3306, "step": 35000 }, { "epoch": 12.389380530973451, "eval_accuracy": 0.7884098376978991, "eval_f1": 0.7980997624703088, "eval_loss": 0.353899210691452, "eval_precision": 0.775909846665435, "eval_recall": 0.8215962441314554, "eval_runtime": 53.8141, "eval_samples_per_second": 186.624, "eval_steps_per_second": 5.835, "step": 35000 }, { "epoch": 12.743362831858407, "grad_norm": 18.79817771911621, "learning_rate": 8.062930186823992e-07, "loss": 0.335, "step": 36000 }, { "epoch": 12.743362831858407, "eval_accuracy": 0.7951807228915663, "eval_f1": 0.7915695612524065, "eval_loss": 0.35350722074508667, "eval_precision": 0.8211057389110784, "eval_recall": 0.7640845070422535, "eval_runtime": 53.8902, "eval_samples_per_second": 186.36, "eval_steps_per_second": 5.827, "step": 36000 }, { "epoch": 13.097345132743364, "grad_norm": 5.215780735015869, "learning_rate": 7.669616519174042e-07, "loss": 0.3286, "step": 37000 }, { "epoch": 13.097345132743364, "eval_accuracy": 0.7942845763218161, "eval_f1": 0.8051678611844587, "eval_loss": 0.35424739122390747, "eval_precision": 0.7773124544792426, "eval_recall": 0.835093896713615, "eval_runtime": 53.8955, "eval_samples_per_second": 186.342, "eval_steps_per_second": 5.826, "step": 37000 }, { "epoch": 13.451327433628318, "grad_norm": 15.066843032836914, "learning_rate": 7.27630285152409e-07, "loss": 0.3239, "step": 38000 }, { "epoch": 13.451327433628318, "eval_accuracy": 0.7946828636861496, "eval_f1": 0.8016544824932667, "eval_loss": 0.3558191955089569, "eval_precision": 0.7886071158213475, "eval_recall": 0.8151408450704225, "eval_runtime": 53.8826, "eval_samples_per_second": 186.387, "eval_steps_per_second": 5.827, "step": 38000 }, { "epoch": 13.805309734513274, "grad_norm": 17.407468795776367, "learning_rate": 6.88298918387414e-07, "loss": 0.3231, "step": 39000 }, { "epoch": 13.805309734513274, "eval_accuracy": 0.7991635965348999, "eval_f1": 0.7988431235663708, "eval_loss": 0.351671427488327, "eval_precision": 0.814852492370295, "eval_recall": 0.7834507042253521, "eval_runtime": 53.9126, "eval_samples_per_second": 186.283, "eval_steps_per_second": 5.824, "step": 39000 }, { "epoch": 14.15929203539823, "grad_norm": 9.669415473937988, "learning_rate": 6.489675516224188e-07, "loss": 0.323, "step": 40000 }, { "epoch": 14.15929203539823, "eval_accuracy": 0.7965747286667331, "eval_f1": 0.8164585392148055, "eval_loss": 0.34596434235572815, "eval_precision": 0.7549426815085563, "eval_recall": 0.8888888888888888, "eval_runtime": 53.8184, "eval_samples_per_second": 186.609, "eval_steps_per_second": 5.834, "step": 40000 }, { "epoch": 14.513274336283185, "grad_norm": 12.53975772857666, "learning_rate": 6.096361848574237e-07, "loss": 0.3158, "step": 41000 }, { "epoch": 14.513274336283185, "eval_accuracy": 0.7998605994224833, "eval_f1": 0.8066192033865692, "eval_loss": 0.35504859685897827, "eval_precision": 0.7936387731919727, "eval_recall": 0.8200312989045383, "eval_runtime": 53.7599, "eval_samples_per_second": 186.812, "eval_steps_per_second": 5.841, "step": 41000 }, { "epoch": 14.867256637168142, "grad_norm": 20.021963119506836, "learning_rate": 5.703048180924287e-07, "loss": 0.3126, "step": 42000 }, { "epoch": 14.867256637168142, "eval_accuracy": 0.8029473264960669, "eval_f1": 0.7949435291679618, "eval_loss": 0.34774094820022583, "eval_precision": 0.8451200705001102, "eval_recall": 0.7503912363067292, "eval_runtime": 53.9679, "eval_samples_per_second": 186.092, "eval_steps_per_second": 5.818, "step": 42000 }, { "epoch": 15.221238938053098, "grad_norm": 27.91417694091797, "learning_rate": 5.309734513274336e-07, "loss": 0.3135, "step": 43000 }, { "epoch": 15.221238938053098, "eval_accuracy": 0.8033456138604003, "eval_f1": 0.8052076141631325, "eval_loss": 0.346435546875, "eval_precision": 0.8120151183608514, "eval_recall": 0.7985133020344288, "eval_runtime": 53.8793, "eval_samples_per_second": 186.398, "eval_steps_per_second": 5.828, "step": 43000 }, { "epoch": 15.575221238938052, "grad_norm": 9.588032722473145, "learning_rate": 4.916420845624385e-07, "loss": 0.3095, "step": 44000 }, { "epoch": 15.575221238938052, "eval_accuracy": 0.8035447575425669, "eval_f1": 0.8067391517288667, "eval_loss": 0.34088295698165894, "eval_precision": 0.807926231116343, "eval_recall": 0.8055555555555556, "eval_runtime": 53.8032, "eval_samples_per_second": 186.662, "eval_steps_per_second": 5.836, "step": 44000 }, { "epoch": 15.929203539823009, "grad_norm": 10.44812297821045, "learning_rate": 4.5231071779744347e-07, "loss": 0.3099, "step": 45000 }, { "epoch": 15.929203539823009, "eval_accuracy": 0.80065717415115, "eval_f1": 0.8102729340409401, "eval_loss": 0.3472129702568054, "eval_precision": 0.7858455882352942, "eval_recall": 0.8362676056338029, "eval_runtime": 53.8919, "eval_samples_per_second": 186.355, "eval_steps_per_second": 5.826, "step": 45000 }, { "epoch": 16.283185840707965, "grad_norm": 10.668062210083008, "learning_rate": 4.1297935103244836e-07, "loss": 0.3056, "step": 46000 }, { "epoch": 16.283185840707965, "eval_accuracy": 0.8055361943642338, "eval_f1": 0.7982229569170369, "eval_loss": 0.3375505208969116, "eval_precision": 0.8458506678344646, "eval_recall": 0.7556729264475743, "eval_runtime": 54.0333, "eval_samples_per_second": 185.867, "eval_steps_per_second": 5.811, "step": 46000 }, { "epoch": 16.63716814159292, "grad_norm": 62.42535400390625, "learning_rate": 3.7364798426745325e-07, "loss": 0.2983, "step": 47000 }, { "epoch": 16.63716814159292, "eval_accuracy": 0.8077267748680673, "eval_f1": 0.8059491508391117, "eval_loss": 0.34593281149864197, "eval_precision": 0.8286836123165944, "eval_recall": 0.7844287949921753, "eval_runtime": 53.7142, "eval_samples_per_second": 186.971, "eval_steps_per_second": 5.846, "step": 47000 }, { "epoch": 16.991150442477878, "grad_norm": 20.520849227905273, "learning_rate": 3.343166175024582e-07, "loss": 0.2974, "step": 48000 }, { "epoch": 16.991150442477878, "eval_accuracy": 0.8077267748680673, "eval_f1": 0.810295706847431, "eval_loss": 0.34830448031425476, "eval_precision": 0.8138938227748175, "eval_recall": 0.8067292644757433, "eval_runtime": 53.7486, "eval_samples_per_second": 186.851, "eval_steps_per_second": 5.842, "step": 48000 }, { "epoch": 17.345132743362832, "grad_norm": 10.991024017333984, "learning_rate": 2.949852507374631e-07, "loss": 0.2966, "step": 49000 }, { "epoch": 17.345132743362832, "eval_accuracy": 0.8075276311859007, "eval_f1": 0.8143666570632863, "eval_loss": 0.35505732893943787, "eval_precision": 0.7998490850782871, "eval_recall": 0.8294209702660407, "eval_runtime": 53.9387, "eval_samples_per_second": 186.193, "eval_steps_per_second": 5.821, "step": 49000 }, { "epoch": 17.699115044247787, "grad_norm": 16.205684661865234, "learning_rate": 2.55653883972468e-07, "loss": 0.2974, "step": 50000 }, { "epoch": 17.699115044247787, "eval_accuracy": 0.8090212088021508, "eval_f1": 0.8085446196845678, "eval_loss": 0.3468624949455261, "eval_precision": 0.8255197717081125, "eval_recall": 0.7922535211267606, "eval_runtime": 53.9814, "eval_samples_per_second": 186.045, "eval_steps_per_second": 5.817, "step": 50000 }, { "epoch": 18.053097345132745, "grad_norm": 11.926201820373535, "learning_rate": 2.1632251720747293e-07, "loss": 0.2937, "step": 51000 }, { "epoch": 18.053097345132745, "eval_accuracy": 0.807925918550234, "eval_f1": 0.8064224786753638, "eval_loss": 0.3447153568267822, "eval_precision": 0.8279414794972182, "eval_recall": 0.7859937402190923, "eval_runtime": 53.9266, "eval_samples_per_second": 186.235, "eval_steps_per_second": 5.823, "step": 51000 }, { "epoch": 18.4070796460177, "grad_norm": 8.203916549682617, "learning_rate": 1.7699115044247788e-07, "loss": 0.2902, "step": 52000 }, { "epoch": 18.4070796460177, "eval_accuracy": 0.808224634073484, "eval_f1": 0.8113983548766157, "eval_loss": 0.34868165850639343, "eval_precision": 0.8123529411764706, "eval_recall": 0.8104460093896714, "eval_runtime": 53.9322, "eval_samples_per_second": 186.215, "eval_steps_per_second": 5.822, "step": 52000 }, { "epoch": 18.761061946902654, "grad_norm": 15.017583847045898, "learning_rate": 1.376597836774828e-07, "loss": 0.2969, "step": 53000 }, { "epoch": 18.761061946902654, "eval_accuracy": 0.8092203524843175, "eval_f1": 0.8131825273010921, "eval_loss": 0.343014657497406, "eval_precision": 0.8106531881804043, "eval_recall": 0.8157276995305164, "eval_runtime": 53.6911, "eval_samples_per_second": 187.051, "eval_steps_per_second": 5.848, "step": 53000 }, { "epoch": 19.115044247787612, "grad_norm": 17.386789321899414, "learning_rate": 9.83284169124877e-08, "loss": 0.2913, "step": 54000 }, { "epoch": 19.115044247787612, "eval_accuracy": 0.8131036542865677, "eval_f1": 0.8098855464397853, "eval_loss": 0.33793944120407104, "eval_precision": 0.8397395505145978, "eval_recall": 0.7820813771517997, "eval_runtime": 53.8514, "eval_samples_per_second": 186.495, "eval_steps_per_second": 5.831, "step": 54000 }, { "epoch": 19.469026548672566, "grad_norm": 10.149681091308594, "learning_rate": 5.899705014749262e-08, "loss": 0.2928, "step": 55000 }, { "epoch": 19.469026548672566, "eval_accuracy": 0.8094194961664841, "eval_f1": 0.8097415506958251, "eval_loss": 0.3402244746685028, "eval_precision": 0.8231608730800324, "eval_recall": 0.7967527386541471, "eval_runtime": 53.8454, "eval_samples_per_second": 186.516, "eval_steps_per_second": 5.832, "step": 55000 }, { "epoch": 19.82300884955752, "grad_norm": 12.412276268005371, "learning_rate": 1.966568338249754e-08, "loss": 0.2894, "step": 56000 }, { "epoch": 19.82300884955752, "eval_accuracy": 0.8115105048292343, "eval_f1": 0.8119972191876055, "eval_loss": 0.3394980728626251, "eval_precision": 0.8246923542465201, "eval_recall": 0.7996870109546166, "eval_runtime": 53.757, "eval_samples_per_second": 186.822, "eval_steps_per_second": 5.841, "step": 56000 } ], "logging_steps": 1000, "max_steps": 56500, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.8793058646730336e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }