|
{ |
|
"best_metric": 0.9623383341955509, |
|
"best_model_checkpoint": ".//debugged_eu_bdt_ses_udpipe_16_0.01_0.00005_20_04-23-24_02-43/checkpoint-8550", |
|
"epoch": 20.0, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.3424, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8618712108798952, |
|
"eval_f1": 0.8046604387530177, |
|
"eval_loss": 0.7042863368988037, |
|
"eval_precision": 0.8160527996593571, |
|
"eval_recall": 0.7935817805383023, |
|
"eval_runtime": 5.6554, |
|
"eval_samples_per_second": 159.141, |
|
"eval_steps_per_second": 19.981, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.787234042553192e-05, |
|
"loss": 0.5837, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9321645092577421, |
|
"eval_f1": 0.9056838826888141, |
|
"eval_loss": 0.37773051857948303, |
|
"eval_precision": 0.9082769390942218, |
|
"eval_recall": 0.9031055900621118, |
|
"eval_runtime": 4.8592, |
|
"eval_samples_per_second": 185.216, |
|
"eval_steps_per_second": 23.255, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.5212765957446815e-05, |
|
"loss": 0.3375, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9487956742585614, |
|
"eval_f1": 0.9285084763336616, |
|
"eval_loss": 0.293639600276947, |
|
"eval_precision": 0.9300031155883269, |
|
"eval_recall": 0.9270186335403726, |
|
"eval_runtime": 4.8723, |
|
"eval_samples_per_second": 184.719, |
|
"eval_steps_per_second": 23.192, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.2443, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9546944125839751, |
|
"eval_f1": 0.9368006630750103, |
|
"eval_loss": 0.2582251727581024, |
|
"eval_precision": 0.9375777685607631, |
|
"eval_recall": 0.9360248447204969, |
|
"eval_runtime": 4.875, |
|
"eval_samples_per_second": 184.617, |
|
"eval_steps_per_second": 23.18, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.9893617021276594e-05, |
|
"loss": 0.1709, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9590365394068491, |
|
"eval_f1": 0.9431171786120591, |
|
"eval_loss": 0.23951154947280884, |
|
"eval_precision": 0.9422401322587312, |
|
"eval_recall": 0.9439958592132505, |
|
"eval_runtime": 4.8802, |
|
"eval_samples_per_second": 184.419, |
|
"eval_steps_per_second": 23.155, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.723404255319149e-05, |
|
"loss": 0.1282, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9626413239390463, |
|
"eval_f1": 0.9479349963771866, |
|
"eval_loss": 0.2380053848028183, |
|
"eval_precision": 0.9478368867729249, |
|
"eval_recall": 0.9480331262939958, |
|
"eval_runtime": 4.8713, |
|
"eval_samples_per_second": 184.757, |
|
"eval_steps_per_second": 23.197, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.4574468085106386e-05, |
|
"loss": 0.0975, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9656726200229395, |
|
"eval_f1": 0.9517748111352583, |
|
"eval_loss": 0.23213887214660645, |
|
"eval_precision": 0.9514794123732672, |
|
"eval_recall": 0.9520703933747412, |
|
"eval_runtime": 5.071, |
|
"eval_samples_per_second": 177.48, |
|
"eval_steps_per_second": 22.284, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.0721, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9674750122890382, |
|
"eval_f1": 0.9542747656775931, |
|
"eval_loss": 0.2307664304971695, |
|
"eval_precision": 0.9547197181639209, |
|
"eval_recall": 0.9538302277432712, |
|
"eval_runtime": 4.872, |
|
"eval_samples_per_second": 184.728, |
|
"eval_steps_per_second": 23.194, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.925531914893617e-05, |
|
"loss": 0.0561, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9686219891856463, |
|
"eval_f1": 0.9559272877932571, |
|
"eval_loss": 0.2352980375289917, |
|
"eval_precision": 0.9564721732822055, |
|
"eval_recall": 0.9553830227743271, |
|
"eval_runtime": 4.8623, |
|
"eval_samples_per_second": 185.097, |
|
"eval_steps_per_second": 23.24, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 0.0395, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9672292315254792, |
|
"eval_f1": 0.954816003312458, |
|
"eval_loss": 0.24887719750404358, |
|
"eval_precision": 0.9547665873098024, |
|
"eval_recall": 0.9548654244306418, |
|
"eval_runtime": 4.8696, |
|
"eval_samples_per_second": 184.821, |
|
"eval_steps_per_second": 23.205, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.393617021276596e-05, |
|
"loss": 0.0284, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9688677699492053, |
|
"eval_f1": 0.9560450925638639, |
|
"eval_loss": 0.2504814863204956, |
|
"eval_precision": 0.955156023971895, |
|
"eval_recall": 0.9569358178053831, |
|
"eval_runtime": 5.8981, |
|
"eval_samples_per_second": 152.592, |
|
"eval_steps_per_second": 19.159, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.0178, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9695231853186957, |
|
"eval_f1": 0.9567693502459229, |
|
"eval_loss": 0.2542861998081207, |
|
"eval_precision": 0.9570170895908856, |
|
"eval_recall": 0.9565217391304348, |
|
"eval_runtime": 5.0143, |
|
"eval_samples_per_second": 179.488, |
|
"eval_steps_per_second": 22.536, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.8617021276595745e-05, |
|
"loss": 0.0119, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9688677699492053, |
|
"eval_f1": 0.9562247749146229, |
|
"eval_loss": 0.2555958926677704, |
|
"eval_precision": 0.9559279950341403, |
|
"eval_recall": 0.9565217391304348, |
|
"eval_runtime": 5.0192, |
|
"eval_samples_per_second": 179.312, |
|
"eval_steps_per_second": 22.514, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 0.008, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9707520891364902, |
|
"eval_f1": 0.9594783418723801, |
|
"eval_loss": 0.26319199800491333, |
|
"eval_precision": 0.9593294008072027, |
|
"eval_recall": 0.9596273291925466, |
|
"eval_runtime": 5.2921, |
|
"eval_samples_per_second": 170.065, |
|
"eval_steps_per_second": 21.353, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.3297872340425532e-05, |
|
"loss": 0.0068, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9711617237424217, |
|
"eval_f1": 0.9603186921206478, |
|
"eval_loss": 0.260258287191391, |
|
"eval_precision": 0.9598717550935981, |
|
"eval_recall": 0.9607660455486542, |
|
"eval_runtime": 5.016, |
|
"eval_samples_per_second": 179.425, |
|
"eval_steps_per_second": 22.528, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.0044, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9702605276093724, |
|
"eval_f1": 0.9582665356570306, |
|
"eval_loss": 0.27104565501213074, |
|
"eval_precision": 0.9574248217422755, |
|
"eval_recall": 0.9591097308488613, |
|
"eval_runtime": 5.1972, |
|
"eval_samples_per_second": 173.172, |
|
"eval_steps_per_second": 21.743, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 7.97872340425532e-06, |
|
"loss": 0.0032, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9718171391119121, |
|
"eval_f1": 0.9608736155677465, |
|
"eval_loss": 0.27112799882888794, |
|
"eval_precision": 0.9607741668391637, |
|
"eval_recall": 0.9609730848861283, |
|
"eval_runtime": 5.352, |
|
"eval_samples_per_second": 168.161, |
|
"eval_steps_per_second": 21.114, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 0.0024, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9717352121907259, |
|
"eval_f1": 0.9607863424728401, |
|
"eval_loss": 0.27230679988861084, |
|
"eval_precision": 0.9602895553257498, |
|
"eval_recall": 0.9612836438923396, |
|
"eval_runtime": 5.124, |
|
"eval_samples_per_second": 175.645, |
|
"eval_steps_per_second": 22.053, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"loss": 0.0019, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9726364083237752, |
|
"eval_f1": 0.9623383341955509, |
|
"eval_loss": 0.27249038219451904, |
|
"eval_precision": 0.9618407445708377, |
|
"eval_recall": 0.9628364389233954, |
|
"eval_runtime": 5.0361, |
|
"eval_samples_per_second": 178.708, |
|
"eval_steps_per_second": 22.438, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0015, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9720629198754711, |
|
"eval_f1": 0.9611961920529801, |
|
"eval_loss": 0.27408093214035034, |
|
"eval_precision": 0.9607985105502689, |
|
"eval_recall": 0.9615942028985507, |
|
"eval_runtime": 5.1327, |
|
"eval_samples_per_second": 175.347, |
|
"eval_steps_per_second": 22.016, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 9000, |
|
"total_flos": 1.1952384086166018e+16, |
|
"train_loss": 0.20792188170221118, |
|
"train_runtime": 4350.4019, |
|
"train_samples_per_second": 33.077, |
|
"train_steps_per_second": 2.069 |
|
} |
|
], |
|
"max_steps": 9000, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.1952384086166018e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|