{ "best_metric": 0.9623383341955509, "best_model_checkpoint": ".//debugged_eu_bdt_ses_udpipe_16_0.01_0.00005_20_04-23-24_02-43/checkpoint-8550", "epoch": 20.0, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.166666666666667e-05, "loss": 2.3424, "step": 450 }, { "epoch": 1.0, "eval_accuracy": 0.8618712108798952, "eval_f1": 0.8046604387530177, "eval_loss": 0.7042863368988037, "eval_precision": 0.8160527996593571, "eval_recall": 0.7935817805383023, "eval_runtime": 5.6554, "eval_samples_per_second": 159.141, "eval_steps_per_second": 19.981, "step": 450 }, { "epoch": 2.0, "learning_rate": 4.787234042553192e-05, "loss": 0.5837, "step": 900 }, { "epoch": 2.0, "eval_accuracy": 0.9321645092577421, "eval_f1": 0.9056838826888141, "eval_loss": 0.37773051857948303, "eval_precision": 0.9082769390942218, "eval_recall": 0.9031055900621118, "eval_runtime": 4.8592, "eval_samples_per_second": 185.216, "eval_steps_per_second": 23.255, "step": 900 }, { "epoch": 3.0, "learning_rate": 4.5212765957446815e-05, "loss": 0.3375, "step": 1350 }, { "epoch": 3.0, "eval_accuracy": 0.9487956742585614, "eval_f1": 0.9285084763336616, "eval_loss": 0.293639600276947, "eval_precision": 0.9300031155883269, "eval_recall": 0.9270186335403726, "eval_runtime": 4.8723, "eval_samples_per_second": 184.719, "eval_steps_per_second": 23.192, "step": 1350 }, { "epoch": 4.0, "learning_rate": 4.2553191489361704e-05, "loss": 0.2443, "step": 1800 }, { "epoch": 4.0, "eval_accuracy": 0.9546944125839751, "eval_f1": 0.9368006630750103, "eval_loss": 0.2582251727581024, "eval_precision": 0.9375777685607631, "eval_recall": 0.9360248447204969, "eval_runtime": 4.875, "eval_samples_per_second": 184.617, "eval_steps_per_second": 23.18, "step": 1800 }, { "epoch": 5.0, "learning_rate": 3.9893617021276594e-05, "loss": 0.1709, "step": 2250 }, { "epoch": 5.0, "eval_accuracy": 0.9590365394068491, "eval_f1": 0.9431171786120591, "eval_loss": 0.23951154947280884, "eval_precision": 0.9422401322587312, "eval_recall": 0.9439958592132505, "eval_runtime": 4.8802, "eval_samples_per_second": 184.419, "eval_steps_per_second": 23.155, "step": 2250 }, { "epoch": 6.0, "learning_rate": 3.723404255319149e-05, "loss": 0.1282, "step": 2700 }, { "epoch": 6.0, "eval_accuracy": 0.9626413239390463, "eval_f1": 0.9479349963771866, "eval_loss": 0.2380053848028183, "eval_precision": 0.9478368867729249, "eval_recall": 0.9480331262939958, "eval_runtime": 4.8713, "eval_samples_per_second": 184.757, "eval_steps_per_second": 23.197, "step": 2700 }, { "epoch": 7.0, "learning_rate": 3.4574468085106386e-05, "loss": 0.0975, "step": 3150 }, { "epoch": 7.0, "eval_accuracy": 0.9656726200229395, "eval_f1": 0.9517748111352583, "eval_loss": 0.23213887214660645, "eval_precision": 0.9514794123732672, "eval_recall": 0.9520703933747412, "eval_runtime": 5.071, "eval_samples_per_second": 177.48, "eval_steps_per_second": 22.284, "step": 3150 }, { "epoch": 8.0, "learning_rate": 3.191489361702128e-05, "loss": 0.0721, "step": 3600 }, { "epoch": 8.0, "eval_accuracy": 0.9674750122890382, "eval_f1": 0.9542747656775931, "eval_loss": 0.2307664304971695, "eval_precision": 0.9547197181639209, "eval_recall": 0.9538302277432712, "eval_runtime": 4.872, "eval_samples_per_second": 184.728, "eval_steps_per_second": 23.194, "step": 3600 }, { "epoch": 9.0, "learning_rate": 2.925531914893617e-05, "loss": 0.0561, "step": 4050 }, { "epoch": 9.0, "eval_accuracy": 0.9686219891856463, "eval_f1": 0.9559272877932571, "eval_loss": 0.2352980375289917, "eval_precision": 0.9564721732822055, "eval_recall": 0.9553830227743271, "eval_runtime": 4.8623, "eval_samples_per_second": 185.097, "eval_steps_per_second": 23.24, "step": 4050 }, { "epoch": 10.0, "learning_rate": 2.6595744680851064e-05, "loss": 0.0395, "step": 4500 }, { "epoch": 10.0, "eval_accuracy": 0.9672292315254792, "eval_f1": 0.954816003312458, "eval_loss": 0.24887719750404358, "eval_precision": 0.9547665873098024, "eval_recall": 0.9548654244306418, "eval_runtime": 4.8696, "eval_samples_per_second": 184.821, "eval_steps_per_second": 23.205, "step": 4500 }, { "epoch": 11.0, "learning_rate": 2.393617021276596e-05, "loss": 0.0284, "step": 4950 }, { "epoch": 11.0, "eval_accuracy": 0.9688677699492053, "eval_f1": 0.9560450925638639, "eval_loss": 0.2504814863204956, "eval_precision": 0.955156023971895, "eval_recall": 0.9569358178053831, "eval_runtime": 5.8981, "eval_samples_per_second": 152.592, "eval_steps_per_second": 19.159, "step": 4950 }, { "epoch": 12.0, "learning_rate": 2.1276595744680852e-05, "loss": 0.0178, "step": 5400 }, { "epoch": 12.0, "eval_accuracy": 0.9695231853186957, "eval_f1": 0.9567693502459229, "eval_loss": 0.2542861998081207, "eval_precision": 0.9570170895908856, "eval_recall": 0.9565217391304348, "eval_runtime": 5.0143, "eval_samples_per_second": 179.488, "eval_steps_per_second": 22.536, "step": 5400 }, { "epoch": 13.0, "learning_rate": 1.8617021276595745e-05, "loss": 0.0119, "step": 5850 }, { "epoch": 13.0, "eval_accuracy": 0.9688677699492053, "eval_f1": 0.9562247749146229, "eval_loss": 0.2555958926677704, "eval_precision": 0.9559279950341403, "eval_recall": 0.9565217391304348, "eval_runtime": 5.0192, "eval_samples_per_second": 179.312, "eval_steps_per_second": 22.514, "step": 5850 }, { "epoch": 14.0, "learning_rate": 1.595744680851064e-05, "loss": 0.008, "step": 6300 }, { "epoch": 14.0, "eval_accuracy": 0.9707520891364902, "eval_f1": 0.9594783418723801, "eval_loss": 0.26319199800491333, "eval_precision": 0.9593294008072027, "eval_recall": 0.9596273291925466, "eval_runtime": 5.2921, "eval_samples_per_second": 170.065, "eval_steps_per_second": 21.353, "step": 6300 }, { "epoch": 15.0, "learning_rate": 1.3297872340425532e-05, "loss": 0.0068, "step": 6750 }, { "epoch": 15.0, "eval_accuracy": 0.9711617237424217, "eval_f1": 0.9603186921206478, "eval_loss": 0.260258287191391, "eval_precision": 0.9598717550935981, "eval_recall": 0.9607660455486542, "eval_runtime": 5.016, "eval_samples_per_second": 179.425, "eval_steps_per_second": 22.528, "step": 6750 }, { "epoch": 16.0, "learning_rate": 1.0638297872340426e-05, "loss": 0.0044, "step": 7200 }, { "epoch": 16.0, "eval_accuracy": 0.9702605276093724, "eval_f1": 0.9582665356570306, "eval_loss": 0.27104565501213074, "eval_precision": 0.9574248217422755, "eval_recall": 0.9591097308488613, "eval_runtime": 5.1972, "eval_samples_per_second": 173.172, "eval_steps_per_second": 21.743, "step": 7200 }, { "epoch": 17.0, "learning_rate": 7.97872340425532e-06, "loss": 0.0032, "step": 7650 }, { "epoch": 17.0, "eval_accuracy": 0.9718171391119121, "eval_f1": 0.9608736155677465, "eval_loss": 0.27112799882888794, "eval_precision": 0.9607741668391637, "eval_recall": 0.9609730848861283, "eval_runtime": 5.352, "eval_samples_per_second": 168.161, "eval_steps_per_second": 21.114, "step": 7650 }, { "epoch": 18.0, "learning_rate": 5.319148936170213e-06, "loss": 0.0024, "step": 8100 }, { "epoch": 18.0, "eval_accuracy": 0.9717352121907259, "eval_f1": 0.9607863424728401, "eval_loss": 0.27230679988861084, "eval_precision": 0.9602895553257498, "eval_recall": 0.9612836438923396, "eval_runtime": 5.124, "eval_samples_per_second": 175.645, "eval_steps_per_second": 22.053, "step": 8100 }, { "epoch": 19.0, "learning_rate": 2.6595744680851065e-06, "loss": 0.0019, "step": 8550 }, { "epoch": 19.0, "eval_accuracy": 0.9726364083237752, "eval_f1": 0.9623383341955509, "eval_loss": 0.27249038219451904, "eval_precision": 0.9618407445708377, "eval_recall": 0.9628364389233954, "eval_runtime": 5.0361, "eval_samples_per_second": 178.708, "eval_steps_per_second": 22.438, "step": 8550 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.0015, "step": 9000 }, { "epoch": 20.0, "eval_accuracy": 0.9720629198754711, "eval_f1": 0.9611961920529801, "eval_loss": 0.27408093214035034, "eval_precision": 0.9607985105502689, "eval_recall": 0.9615942028985507, "eval_runtime": 5.1327, "eval_samples_per_second": 175.347, "eval_steps_per_second": 22.016, "step": 9000 }, { "epoch": 20.0, "step": 9000, "total_flos": 1.1952384086166018e+16, "train_loss": 0.20792188170221118, "train_runtime": 4350.4019, "train_samples_per_second": 33.077, "train_steps_per_second": 2.069 } ], "max_steps": 9000, "num_train_epochs": 20, "total_flos": 1.1952384086166018e+16, "trial_name": null, "trial_params": null }