|
{ |
|
"best_metric": 0.8199536621312183, |
|
"best_model_checkpoint": "/home/bram/shares/predict/trained/dutch/hebban-reviews/bert-base-dutch-cased/checkpoint-11000", |
|
"epoch": 3.9447731755424065, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.792083333333334e-05, |
|
"loss": 0.7371, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7546844181459567, |
|
"eval_f1": 0.7629915603619231, |
|
"eval_loss": 0.6527217626571655, |
|
"eval_precision": 0.7780177612862748, |
|
"eval_recall": 0.7546844181459567, |
|
"eval_runtime": 24.4866, |
|
"eval_samples_per_second": 662.566, |
|
"eval_steps_per_second": 27.607, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.58375e-05, |
|
"loss": 0.6457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.7643614398422091, |
|
"eval_f1": 0.7769010779814548, |
|
"eval_loss": 0.6216204166412354, |
|
"eval_precision": 0.8017754901402682, |
|
"eval_recall": 0.7643614398422091, |
|
"eval_runtime": 24.6912, |
|
"eval_samples_per_second": 657.077, |
|
"eval_steps_per_second": 27.378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.375416666666667e-05, |
|
"loss": 0.6184, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.7790310650887574, |
|
"eval_f1": 0.789661025567087, |
|
"eval_loss": 0.6005069017410278, |
|
"eval_precision": 0.8094066060995802, |
|
"eval_recall": 0.7790310650887574, |
|
"eval_runtime": 24.5988, |
|
"eval_samples_per_second": 659.544, |
|
"eval_steps_per_second": 27.481, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.1670833333333334e-05, |
|
"loss": 0.6042, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.8077539447731755, |
|
"eval_f1": 0.8118714417209679, |
|
"eval_loss": 0.5989466309547424, |
|
"eval_precision": 0.8220143498287384, |
|
"eval_recall": 0.8077539447731755, |
|
"eval_runtime": 24.5976, |
|
"eval_samples_per_second": 659.577, |
|
"eval_steps_per_second": 27.482, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.959166666666667e-05, |
|
"loss": 0.5744, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7972140039447732, |
|
"eval_f1": 0.8043419281165946, |
|
"eval_loss": 0.5735336542129517, |
|
"eval_precision": 0.8156333669948874, |
|
"eval_recall": 0.7972140039447732, |
|
"eval_runtime": 24.5948, |
|
"eval_samples_per_second": 659.651, |
|
"eval_steps_per_second": 27.485, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7512500000000004e-05, |
|
"loss": 0.5833, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8060281065088757, |
|
"eval_f1": 0.8074379985793598, |
|
"eval_loss": 0.5610195994377136, |
|
"eval_precision": 0.8123421315309852, |
|
"eval_recall": 0.8060281065088757, |
|
"eval_runtime": 24.5935, |
|
"eval_samples_per_second": 659.686, |
|
"eval_steps_per_second": 27.487, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.542916666666667e-05, |
|
"loss": 0.4633, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.8055966469428008, |
|
"eval_f1": 0.8112577676458685, |
|
"eval_loss": 0.5967000126838684, |
|
"eval_precision": 0.8196531984723394, |
|
"eval_recall": 0.8055966469428008, |
|
"eval_runtime": 24.7636, |
|
"eval_samples_per_second": 655.155, |
|
"eval_steps_per_second": 27.298, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.3345833333333335e-05, |
|
"loss": 0.4473, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.8108357988165681, |
|
"eval_f1": 0.8144157036046853, |
|
"eval_loss": 0.6390397548675537, |
|
"eval_precision": 0.8190905659244092, |
|
"eval_recall": 0.8108357988165681, |
|
"eval_runtime": 24.7631, |
|
"eval_samples_per_second": 655.169, |
|
"eval_steps_per_second": 27.299, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.12625e-05, |
|
"loss": 0.4493, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.7642998027613412, |
|
"eval_f1": 0.780188565362406, |
|
"eval_loss": 0.6074336171150208, |
|
"eval_precision": 0.818173356753982, |
|
"eval_recall": 0.7642998027613412, |
|
"eval_runtime": 24.6961, |
|
"eval_samples_per_second": 656.945, |
|
"eval_steps_per_second": 27.373, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.917916666666667e-05, |
|
"loss": 0.4402, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.817430966469428, |
|
"eval_f1": 0.818650395547064, |
|
"eval_loss": 0.6175166368484497, |
|
"eval_precision": 0.8200081119899918, |
|
"eval_recall": 0.817430966469428, |
|
"eval_runtime": 24.5978, |
|
"eval_samples_per_second": 659.57, |
|
"eval_steps_per_second": 27.482, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.7100000000000005e-05, |
|
"loss": 0.4406, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.8167529585798816, |
|
"eval_f1": 0.8194587497642177, |
|
"eval_loss": 0.6328938603401184, |
|
"eval_precision": 0.8229261899048395, |
|
"eval_recall": 0.8167529585798816, |
|
"eval_runtime": 24.611, |
|
"eval_samples_per_second": 659.218, |
|
"eval_steps_per_second": 27.467, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5016666666666667e-05, |
|
"loss": 0.4457, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.8178007889546351, |
|
"eval_f1": 0.8196437981366307, |
|
"eval_loss": 0.6221566796302795, |
|
"eval_precision": 0.8219923397562409, |
|
"eval_recall": 0.8178007889546351, |
|
"eval_runtime": 24.6955, |
|
"eval_samples_per_second": 656.961, |
|
"eval_steps_per_second": 27.373, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.2933333333333333e-05, |
|
"loss": 0.3013, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.8099112426035503, |
|
"eval_f1": 0.8154916160650456, |
|
"eval_loss": 0.7676593661308289, |
|
"eval_precision": 0.8240038245839082, |
|
"eval_recall": 0.8099112426035503, |
|
"eval_runtime": 24.7339, |
|
"eval_samples_per_second": 655.941, |
|
"eval_steps_per_second": 27.331, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.0854166666666668e-05, |
|
"loss": 0.2875, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.8025764299802761, |
|
"eval_f1": 0.8097813881432507, |
|
"eval_loss": 0.7549890279769897, |
|
"eval_precision": 0.8215043597038734, |
|
"eval_recall": 0.8025764299802761, |
|
"eval_runtime": 24.7599, |
|
"eval_samples_per_second": 655.252, |
|
"eval_steps_per_second": 27.302, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8770833333333333e-05, |
|
"loss": 0.2804, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.8102194280078896, |
|
"eval_f1": 0.8148870229811181, |
|
"eval_loss": 0.804236114025116, |
|
"eval_precision": 0.8221239914552869, |
|
"eval_recall": 0.8102194280078896, |
|
"eval_runtime": 24.6532, |
|
"eval_samples_per_second": 658.09, |
|
"eval_steps_per_second": 27.42, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.66875e-05, |
|
"loss": 0.2784, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.8136711045364892, |
|
"eval_f1": 0.8173932526970648, |
|
"eval_loss": 0.8103494644165039, |
|
"eval_precision": 0.8223803033268108, |
|
"eval_recall": 0.8136711045364892, |
|
"eval_runtime": 24.5862, |
|
"eval_samples_per_second": 659.883, |
|
"eval_steps_per_second": 27.495, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4604166666666666e-05, |
|
"loss": 0.275, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.8132396449704142, |
|
"eval_f1": 0.8172151251977423, |
|
"eval_loss": 0.8132022619247437, |
|
"eval_precision": 0.8227088066747076, |
|
"eval_recall": 0.8132396449704142, |
|
"eval_runtime": 24.5279, |
|
"eval_samples_per_second": 661.452, |
|
"eval_steps_per_second": 27.56, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.2520833333333334e-05, |
|
"loss": 0.2651, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8065212031558185, |
|
"eval_f1": 0.8120431442321697, |
|
"eval_loss": 0.7826108336448669, |
|
"eval_precision": 0.8202960014382004, |
|
"eval_recall": 0.8065212031558185, |
|
"eval_runtime": 24.7019, |
|
"eval_samples_per_second": 656.791, |
|
"eval_steps_per_second": 27.366, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.0441666666666667e-05, |
|
"loss": 0.1855, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.8140409270216963, |
|
"eval_f1": 0.8173959290765784, |
|
"eval_loss": 1.0370513200759888, |
|
"eval_precision": 0.8233538681349268, |
|
"eval_recall": 0.8140409270216963, |
|
"eval_runtime": 24.6749, |
|
"eval_samples_per_second": 657.51, |
|
"eval_steps_per_second": 27.396, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.358333333333333e-06, |
|
"loss": 0.1571, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_accuracy": 0.8128081854043393, |
|
"eval_f1": 0.8173020798805183, |
|
"eval_loss": 1.036438226699829, |
|
"eval_precision": 0.8246320290318612, |
|
"eval_recall": 0.8128081854043393, |
|
"eval_runtime": 24.6314, |
|
"eval_samples_per_second": 658.672, |
|
"eval_steps_per_second": 27.445, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.275e-06, |
|
"loss": 0.1608, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.8108357988165681, |
|
"eval_f1": 0.8154023680396078, |
|
"eval_loss": 1.0521161556243896, |
|
"eval_precision": 0.8221458981635473, |
|
"eval_recall": 0.8108357988165681, |
|
"eval_runtime": 24.6401, |
|
"eval_samples_per_second": 658.44, |
|
"eval_steps_per_second": 27.435, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.191666666666667e-06, |
|
"loss": 0.1542, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.8197115384615384, |
|
"eval_f1": 0.8199536621312183, |
|
"eval_loss": 1.108322262763977, |
|
"eval_precision": 0.8205846609719522, |
|
"eval_recall": 0.8197115384615384, |
|
"eval_runtime": 24.5935, |
|
"eval_samples_per_second": 659.687, |
|
"eval_steps_per_second": 27.487, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.108333333333333e-06, |
|
"loss": 0.1561, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.8138560157790927, |
|
"eval_f1": 0.8166825272884544, |
|
"eval_loss": 1.0635404586791992, |
|
"eval_precision": 0.8205938348020483, |
|
"eval_recall": 0.8138560157790927, |
|
"eval_runtime": 24.8393, |
|
"eval_samples_per_second": 653.159, |
|
"eval_steps_per_second": 27.215, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.916666666666667e-08, |
|
"loss": 0.1415, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.815396942800789, |
|
"eval_f1": 0.8176978993348879, |
|
"eval_loss": 1.0898783206939697, |
|
"eval_precision": 0.8208248888308868, |
|
"eval_recall": 0.815396942800789, |
|
"eval_runtime": 24.6997, |
|
"eval_samples_per_second": 656.85, |
|
"eval_steps_per_second": 27.369, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"step": 12000, |
|
"total_flos": 1.4832731619013427e+17, |
|
"train_loss": 0.37884856541951495, |
|
"train_runtime": 3446.9333, |
|
"train_samples_per_second": 167.105, |
|
"train_steps_per_second": 3.481 |
|
} |
|
], |
|
"max_steps": 12000, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.4832731619013427e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|