|
{ |
|
"best_metric": 0.826051364529945, |
|
"best_model_checkpoint": "/home/bram/shares/predict/trained/dutch/hebban-reviews/robbert-v2-dutch-base/checkpoint-2500", |
|
"epoch": 3.9447731755424065, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7925000000000006e-05, |
|
"loss": 0.7262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7467948717948718, |
|
"eval_f1": 0.7588505397999891, |
|
"eval_loss": 0.6503846049308777, |
|
"eval_precision": 0.7826608700318252, |
|
"eval_recall": 0.7467948717948718, |
|
"eval_runtime": 24.2662, |
|
"eval_samples_per_second": 668.584, |
|
"eval_steps_per_second": 27.858, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.5841666666666665e-05, |
|
"loss": 0.6388, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.7707716962524654, |
|
"eval_f1": 0.7828012503260621, |
|
"eval_loss": 0.6046772599220276, |
|
"eval_precision": 0.806817293633085, |
|
"eval_recall": 0.7707716962524654, |
|
"eval_runtime": 24.3206, |
|
"eval_samples_per_second": 667.089, |
|
"eval_steps_per_second": 27.795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.375833333333334e-05, |
|
"loss": 0.6036, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.7991247534516766, |
|
"eval_f1": 0.8068009793163453, |
|
"eval_loss": 0.5817954540252686, |
|
"eval_precision": 0.8200694446499367, |
|
"eval_recall": 0.7991247534516766, |
|
"eval_runtime": 24.3415, |
|
"eval_samples_per_second": 666.516, |
|
"eval_steps_per_second": 27.771, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.1675e-05, |
|
"loss": 0.5894, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.8214373767258383, |
|
"eval_f1": 0.8223555383757819, |
|
"eval_loss": 0.5838125944137573, |
|
"eval_precision": 0.8240754296334577, |
|
"eval_recall": 0.8214373767258383, |
|
"eval_runtime": 24.3264, |
|
"eval_samples_per_second": 666.929, |
|
"eval_steps_per_second": 27.789, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 0.5678, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.8257519723865878, |
|
"eval_f1": 0.826051364529945, |
|
"eval_loss": 0.565325915813446, |
|
"eval_precision": 0.8264063729685338, |
|
"eval_recall": 0.8257519723865878, |
|
"eval_runtime": 24.3378, |
|
"eval_samples_per_second": 666.617, |
|
"eval_steps_per_second": 27.776, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7516666666666666e-05, |
|
"loss": 0.5821, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8164447731755424, |
|
"eval_f1": 0.8171577310703354, |
|
"eval_loss": 0.5594866275787354, |
|
"eval_precision": 0.8191229796098939, |
|
"eval_recall": 0.8164447731755424, |
|
"eval_runtime": 24.3358, |
|
"eval_samples_per_second": 666.673, |
|
"eval_steps_per_second": 27.778, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.543333333333333e-05, |
|
"loss": 0.4741, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.7975838264299803, |
|
"eval_f1": 0.8063837559416328, |
|
"eval_loss": 0.5798861384391785, |
|
"eval_precision": 0.8218943463788534, |
|
"eval_recall": 0.7975838264299803, |
|
"eval_runtime": 24.3305, |
|
"eval_samples_per_second": 666.816, |
|
"eval_steps_per_second": 27.784, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.3350000000000004e-05, |
|
"loss": 0.4763, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.8000493096646942, |
|
"eval_f1": 0.8080426775793622, |
|
"eval_loss": 0.5934886336326599, |
|
"eval_precision": 0.8221336258804294, |
|
"eval_recall": 0.8000493096646942, |
|
"eval_runtime": 24.3209, |
|
"eval_samples_per_second": 667.08, |
|
"eval_steps_per_second": 27.795, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.126666666666666e-05, |
|
"loss": 0.4719, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.7595537475345168, |
|
"eval_f1": 0.7769614913537937, |
|
"eval_loss": 0.5927842855453491, |
|
"eval_precision": 0.8215588992651691, |
|
"eval_recall": 0.7595537475345168, |
|
"eval_runtime": 24.3097, |
|
"eval_samples_per_second": 667.388, |
|
"eval_steps_per_second": 27.808, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.91875e-05, |
|
"loss": 0.477, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.8143491124260355, |
|
"eval_f1": 0.8178014358117583, |
|
"eval_loss": 0.571167528629303, |
|
"eval_precision": 0.8224283181214769, |
|
"eval_recall": 0.8143491124260355, |
|
"eval_runtime": 24.3019, |
|
"eval_samples_per_second": 667.601, |
|
"eval_steps_per_second": 27.817, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.710416666666667e-05, |
|
"loss": 0.4627, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.8163831360946746, |
|
"eval_f1": 0.8209409596272614, |
|
"eval_loss": 0.5852541327476501, |
|
"eval_precision": 0.8276030279019199, |
|
"eval_recall": 0.8163831360946746, |
|
"eval_runtime": 24.3288, |
|
"eval_samples_per_second": 666.863, |
|
"eval_steps_per_second": 27.786, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5020833333333333e-05, |
|
"loss": 0.4669, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.823594674556213, |
|
"eval_f1": 0.8244437619387351, |
|
"eval_loss": 0.5960806608200073, |
|
"eval_precision": 0.8258010274342865, |
|
"eval_recall": 0.823594674556213, |
|
"eval_runtime": 24.228, |
|
"eval_samples_per_second": 669.639, |
|
"eval_steps_per_second": 27.902, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.2937500000000002e-05, |
|
"loss": 0.3604, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.8014669625246549, |
|
"eval_f1": 0.8102801213854947, |
|
"eval_loss": 0.6862322092056274, |
|
"eval_precision": 0.8275335401956307, |
|
"eval_recall": 0.8014669625246549, |
|
"eval_runtime": 24.2792, |
|
"eval_samples_per_second": 668.227, |
|
"eval_steps_per_second": 27.843, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.0854166666666668e-05, |
|
"loss": 0.3489, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.7792776134122288, |
|
"eval_f1": 0.7927818598604747, |
|
"eval_loss": 0.6760165095329285, |
|
"eval_precision": 0.822269294544987, |
|
"eval_recall": 0.7792776134122288, |
|
"eval_runtime": 24.2892, |
|
"eval_samples_per_second": 667.95, |
|
"eval_steps_per_second": 27.831, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8775000000000002e-05, |
|
"loss": 0.3394, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.8185404339250493, |
|
"eval_f1": 0.8224920113739561, |
|
"eval_loss": 0.6916875839233398, |
|
"eval_precision": 0.8283683902286886, |
|
"eval_recall": 0.8185404339250493, |
|
"eval_runtime": 24.2568, |
|
"eval_samples_per_second": 668.844, |
|
"eval_steps_per_second": 27.868, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.6695833333333334e-05, |
|
"loss": 0.3462, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.8147805719921104, |
|
"eval_f1": 0.8200537266341018, |
|
"eval_loss": 0.6634742617607117, |
|
"eval_precision": 0.8279502059904518, |
|
"eval_recall": 0.8147805719921104, |
|
"eval_runtime": 24.237, |
|
"eval_samples_per_second": 669.39, |
|
"eval_steps_per_second": 27.891, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4612500000000001e-05, |
|
"loss": 0.3498, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.8173076923076923, |
|
"eval_f1": 0.8219188790015944, |
|
"eval_loss": 0.6741124987602234, |
|
"eval_precision": 0.8291747460835226, |
|
"eval_recall": 0.8173076923076923, |
|
"eval_runtime": 24.3305, |
|
"eval_samples_per_second": 666.817, |
|
"eval_steps_per_second": 27.784, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.2529166666666667e-05, |
|
"loss": 0.335, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8091715976331361, |
|
"eval_f1": 0.8160105785088146, |
|
"eval_loss": 0.6432847380638123, |
|
"eval_precision": 0.8271351948602929, |
|
"eval_recall": 0.8091715976331361, |
|
"eval_runtime": 24.2377, |
|
"eval_samples_per_second": 669.372, |
|
"eval_steps_per_second": 27.89, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.0445833333333334e-05, |
|
"loss": 0.2724, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.8155818540433925, |
|
"eval_f1": 0.8200452160739635, |
|
"eval_loss": 0.778809666633606, |
|
"eval_precision": 0.8263415739712193, |
|
"eval_recall": 0.8155818540433925, |
|
"eval_runtime": 24.3292, |
|
"eval_samples_per_second": 666.853, |
|
"eval_steps_per_second": 27.786, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.362500000000002e-06, |
|
"loss": 0.2404, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_accuracy": 0.8102194280078896, |
|
"eval_f1": 0.8165639264989629, |
|
"eval_loss": 0.7925969958305359, |
|
"eval_precision": 0.8272230495650681, |
|
"eval_recall": 0.8102194280078896, |
|
"eval_runtime": 24.3267, |
|
"eval_samples_per_second": 666.921, |
|
"eval_steps_per_second": 27.788, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.2791666666666665e-06, |
|
"loss": 0.2417, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.811698717948718, |
|
"eval_f1": 0.8176421251758528, |
|
"eval_loss": 0.8324716687202454, |
|
"eval_precision": 0.8284127391855651, |
|
"eval_recall": 0.811698717948718, |
|
"eval_runtime": 24.2309, |
|
"eval_samples_per_second": 669.559, |
|
"eval_steps_per_second": 27.898, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.2426, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.8218071992110454, |
|
"eval_f1": 0.8237488575881482, |
|
"eval_loss": 0.835399866104126, |
|
"eval_precision": 0.8261715829036445, |
|
"eval_recall": 0.8218071992110454, |
|
"eval_runtime": 24.2418, |
|
"eval_samples_per_second": 669.257, |
|
"eval_steps_per_second": 27.886, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.1166666666666666e-06, |
|
"loss": 0.2399, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.8130547337278107, |
|
"eval_f1": 0.8181997138713092, |
|
"eval_loss": 0.8061773777008057, |
|
"eval_precision": 0.8260171371000359, |
|
"eval_recall": 0.8130547337278107, |
|
"eval_runtime": 24.2269, |
|
"eval_samples_per_second": 669.67, |
|
"eval_steps_per_second": 27.903, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"loss": 0.2292, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.8134245562130178, |
|
"eval_f1": 0.8183934985407206, |
|
"eval_loss": 0.8132809996604919, |
|
"eval_precision": 0.8258917495347268, |
|
"eval_recall": 0.8134245562130178, |
|
"eval_runtime": 24.2378, |
|
"eval_samples_per_second": 669.369, |
|
"eval_steps_per_second": 27.89, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"step": 12000, |
|
"total_flos": 1.481337892587438e+17, |
|
"train_loss": 0.42010608355204265, |
|
"train_runtime": 3460.7395, |
|
"train_samples_per_second": 166.438, |
|
"train_steps_per_second": 3.467 |
|
} |
|
], |
|
"max_steps": 12000, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.481337892587438e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|