|
{ |
|
"best_metric": 43.86838413539317, |
|
"best_model_checkpoint": "results/subtask2-roberta-large-3e-5-32-10-evallabels/checkpoint-950", |
|
"epoch": 10.0, |
|
"global_step": 1890, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 1.4415, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy_score": 76.77304173175631, |
|
"eval_f1": 0.29239766081871343, |
|
"eval_loss": 0.6649255752563477, |
|
"eval_precision": 1.2345679012345678, |
|
"eval_recall": 1.355421686746988, |
|
"eval_runtime": 8.6446, |
|
"eval_samples_per_second": 136.386, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.6015, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy_score": 85.00781075652756, |
|
"eval_f1": 7.7752154879329645, |
|
"eval_loss": 0.44424399733543396, |
|
"eval_precision": 23.591549295774648, |
|
"eval_recall": 20.180722891566266, |
|
"eval_runtime": 8.8048, |
|
"eval_samples_per_second": 133.905, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.4787, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy_score": 84.93639812541844, |
|
"eval_f1": 13.33891292959303, |
|
"eval_loss": 0.42927324771881104, |
|
"eval_precision": 30.36036036036036, |
|
"eval_recall": 50.75301204819277, |
|
"eval_runtime": 8.7806, |
|
"eval_samples_per_second": 134.274, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.980599647266314e-05, |
|
"loss": 0.4004, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy_score": 86.96273153313993, |
|
"eval_f1": 22.702533225449525, |
|
"eval_loss": 0.3721863925457001, |
|
"eval_precision": 43.04812834224599, |
|
"eval_recall": 48.493975903614455, |
|
"eval_runtime": 8.7281, |
|
"eval_samples_per_second": 135.081, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8924162257495593e-05, |
|
"loss": 0.3509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy_score": 85.72193706761884, |
|
"eval_f1": 30.029422978055926, |
|
"eval_loss": 0.3999338746070862, |
|
"eval_precision": 38.4, |
|
"eval_recall": 57.831325301204814, |
|
"eval_runtime": 8.8031, |
|
"eval_samples_per_second": 133.929, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.8042328042328043e-05, |
|
"loss": 0.3751, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy_score": 87.88216915866994, |
|
"eval_f1": 36.06508408748763, |
|
"eval_loss": 0.33605942130088806, |
|
"eval_precision": 48.492791612057665, |
|
"eval_recall": 55.72289156626506, |
|
"eval_runtime": 8.8234, |
|
"eval_samples_per_second": 133.622, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.7160493827160493e-05, |
|
"loss": 0.32, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy_score": 87.23499218924347, |
|
"eval_f1": 38.96036192862586, |
|
"eval_loss": 0.3504337668418884, |
|
"eval_precision": 45.92178770949721, |
|
"eval_recall": 61.897590361445786, |
|
"eval_runtime": 8.8119, |
|
"eval_samples_per_second": 133.797, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.6278659611992947e-05, |
|
"loss": 0.338, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy_score": 87.51617942423566, |
|
"eval_f1": 37.19003385850202, |
|
"eval_loss": 0.3751635253429413, |
|
"eval_precision": 47.30473047304731, |
|
"eval_recall": 64.7590361445783, |
|
"eval_runtime": 8.8298, |
|
"eval_samples_per_second": 133.526, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.5396825396825397e-05, |
|
"loss": 0.2625, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy_score": 87.68578442311984, |
|
"eval_f1": 33.11482983162181, |
|
"eval_loss": 0.3540908396244049, |
|
"eval_precision": 51.25944584382871, |
|
"eval_recall": 61.295180722891565, |
|
"eval_runtime": 8.6563, |
|
"eval_samples_per_second": 136.201, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4514991181657847e-05, |
|
"loss": 0.2718, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy_score": 88.29279178754742, |
|
"eval_f1": 32.55245599678572, |
|
"eval_loss": 0.3684297502040863, |
|
"eval_precision": 51.84243964421855, |
|
"eval_recall": 61.44578313253012, |
|
"eval_runtime": 8.9791, |
|
"eval_samples_per_second": 131.305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.36331569664903e-05, |
|
"loss": 0.2708, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy_score": 86.14594956482928, |
|
"eval_f1": 38.566294416176056, |
|
"eval_loss": 0.3753862679004669, |
|
"eval_precision": 51.88571428571429, |
|
"eval_recall": 68.37349397590361, |
|
"eval_runtime": 8.8041, |
|
"eval_samples_per_second": 133.916, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.275132275132275e-05, |
|
"loss": 0.2305, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_accuracy_score": 88.5918321803169, |
|
"eval_f1": 39.92203233980172, |
|
"eval_loss": 0.3945120573043823, |
|
"eval_precision": 52.08845208845209, |
|
"eval_recall": 63.85542168674698, |
|
"eval_runtime": 8.8109, |
|
"eval_samples_per_second": 133.812, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.1869488536155205e-05, |
|
"loss": 0.2122, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_accuracy_score": 88.7034144164249, |
|
"eval_f1": 37.88744391080894, |
|
"eval_loss": 0.35788846015930176, |
|
"eval_precision": 53.2319391634981, |
|
"eval_recall": 63.25301204819277, |
|
"eval_runtime": 8.7495, |
|
"eval_samples_per_second": 134.751, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.0987654320987655e-05, |
|
"loss": 0.2126, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy_score": 87.35103771479581, |
|
"eval_f1": 38.094203066236034, |
|
"eval_loss": 0.37450292706489563, |
|
"eval_precision": 52.6071842410197, |
|
"eval_recall": 68.37349397590361, |
|
"eval_runtime": 8.8823, |
|
"eval_samples_per_second": 132.736, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.0105820105820105e-05, |
|
"loss": 0.211, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_accuracy_score": 88.2883284981031, |
|
"eval_f1": 38.51324803050831, |
|
"eval_loss": 0.3612821102142334, |
|
"eval_precision": 61.22159090909091, |
|
"eval_recall": 64.90963855421687, |
|
"eval_runtime": 9.0113, |
|
"eval_samples_per_second": 130.836, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.922398589065256e-05, |
|
"loss": 0.1651, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_accuracy_score": 88.56505244365097, |
|
"eval_f1": 38.932898373925724, |
|
"eval_loss": 0.44898930191993713, |
|
"eval_precision": 57.75749674054759, |
|
"eval_recall": 66.71686746987952, |
|
"eval_runtime": 10.3767, |
|
"eval_samples_per_second": 113.62, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.834215167548501e-05, |
|
"loss": 0.1649, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy_score": 88.66324481142603, |
|
"eval_f1": 39.08416734249581, |
|
"eval_loss": 0.45583415031433105, |
|
"eval_precision": 58.894878706199464, |
|
"eval_recall": 65.8132530120482, |
|
"eval_runtime": 10.0575, |
|
"eval_samples_per_second": 117.226, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.1788, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_accuracy_score": 88.9622852041955, |
|
"eval_f1": 40.02918919780824, |
|
"eval_loss": 0.4240648150444031, |
|
"eval_precision": 60.96866096866097, |
|
"eval_recall": 64.45783132530121, |
|
"eval_runtime": 10.0412, |
|
"eval_samples_per_second": 117.417, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.6578483245149913e-05, |
|
"loss": 0.1676, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_accuracy_score": 87.55634902923455, |
|
"eval_f1": 43.86838413539317, |
|
"eval_loss": 0.4299115240573883, |
|
"eval_precision": 57.91505791505791, |
|
"eval_recall": 67.7710843373494, |
|
"eval_runtime": 10.0147, |
|
"eval_samples_per_second": 117.726, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 1.5696649029982366e-05, |
|
"loss": 0.1179, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_accuracy_score": 87.20821245257756, |
|
"eval_f1": 39.553135574552805, |
|
"eval_loss": 0.49208664894104004, |
|
"eval_precision": 56.746532156368225, |
|
"eval_recall": 67.7710843373494, |
|
"eval_runtime": 10.0407, |
|
"eval_samples_per_second": 117.423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1383, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_accuracy_score": 87.743807185896, |
|
"eval_f1": 38.8202089054134, |
|
"eval_loss": 0.4442407786846161, |
|
"eval_precision": 56.649616368286445, |
|
"eval_recall": 66.71686746987952, |
|
"eval_runtime": 9.9651, |
|
"eval_samples_per_second": 118.313, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.3932980599647267e-05, |
|
"loss": 0.1381, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy_score": 86.94487837536265, |
|
"eval_f1": 38.34834659982452, |
|
"eval_loss": 0.4821961224079132, |
|
"eval_precision": 59.24932975871313, |
|
"eval_recall": 66.56626506024097, |
|
"eval_runtime": 9.9971, |
|
"eval_samples_per_second": 117.934, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.3051146384479719e-05, |
|
"loss": 0.1308, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_accuracy_score": 87.83307297478241, |
|
"eval_f1": 43.06179088429465, |
|
"eval_loss": 0.5310231447219849, |
|
"eval_precision": 59.2896174863388, |
|
"eval_recall": 65.36144578313254, |
|
"eval_runtime": 10.0333, |
|
"eval_samples_per_second": 117.508, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.2169312169312169e-05, |
|
"loss": 0.1008, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"eval_accuracy_score": 87.8062932381165, |
|
"eval_f1": 38.25726977101606, |
|
"eval_loss": 0.5424754023551941, |
|
"eval_precision": 55.38277511961722, |
|
"eval_recall": 69.7289156626506, |
|
"eval_runtime": 9.9751, |
|
"eval_samples_per_second": 118.194, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.1287477954144621e-05, |
|
"loss": 0.1133, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_accuracy_score": 88.02945771033251, |
|
"eval_f1": 40.50705288524587, |
|
"eval_loss": 0.505093514919281, |
|
"eval_precision": 57.824933687002655, |
|
"eval_recall": 65.66265060240963, |
|
"eval_runtime": 10.0854, |
|
"eval_samples_per_second": 116.902, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.0405643738977073e-05, |
|
"loss": 0.1167, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_accuracy_score": 87.98928810533363, |
|
"eval_f1": 37.904391097050066, |
|
"eval_loss": 0.4733125865459442, |
|
"eval_precision": 57.469717362045756, |
|
"eval_recall": 64.30722891566265, |
|
"eval_runtime": 8.7518, |
|
"eval_samples_per_second": 134.716, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.0987, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_accuracy_score": 87.83753626422673, |
|
"eval_f1": 37.322003651376725, |
|
"eval_loss": 0.6008809208869934, |
|
"eval_precision": 55.66502463054187, |
|
"eval_recall": 68.07228915662651, |
|
"eval_runtime": 8.967, |
|
"eval_samples_per_second": 131.482, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 8.641975308641977e-06, |
|
"loss": 0.0929, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_accuracy_score": 88.2883284981031, |
|
"eval_f1": 38.96437550077822, |
|
"eval_loss": 0.5702655911445618, |
|
"eval_precision": 58.97771952817824, |
|
"eval_recall": 67.7710843373494, |
|
"eval_runtime": 10.9203, |
|
"eval_samples_per_second": 107.964, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 7.760141093474427e-06, |
|
"loss": 0.0939, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_accuracy_score": 87.980361526445, |
|
"eval_f1": 36.57320046638653, |
|
"eval_loss": 0.5611082315444946, |
|
"eval_precision": 56.08194622279129, |
|
"eval_recall": 65.96385542168674, |
|
"eval_runtime": 11.1802, |
|
"eval_samples_per_second": 105.454, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 6.878306878306878e-06, |
|
"loss": 0.0976, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_accuracy_score": 88.6364650747601, |
|
"eval_f1": 37.89298254130058, |
|
"eval_loss": 0.6237432360649109, |
|
"eval_precision": 59.53360768175583, |
|
"eval_recall": 65.36144578313254, |
|
"eval_runtime": 10.4805, |
|
"eval_samples_per_second": 112.495, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 5.99647266313933e-06, |
|
"loss": 0.0806, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_accuracy_score": 86.5654987725954, |
|
"eval_f1": 36.826891066079725, |
|
"eval_loss": 0.6972205638885498, |
|
"eval_precision": 57.01643489254109, |
|
"eval_recall": 67.92168674698796, |
|
"eval_runtime": 10.9535, |
|
"eval_samples_per_second": 107.636, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 5.114638447971782e-06, |
|
"loss": 0.0784, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_accuracy_score": 87.85092613255969, |
|
"eval_f1": 37.500099347680326, |
|
"eval_loss": 0.6484220623970032, |
|
"eval_precision": 58.68983957219251, |
|
"eval_recall": 66.1144578313253, |
|
"eval_runtime": 8.8955, |
|
"eval_samples_per_second": 132.539, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 4.232804232804233e-06, |
|
"loss": 0.0821, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_accuracy_score": 87.84646284311538, |
|
"eval_f1": 41.22001286807445, |
|
"eval_loss": 0.6608390808105469, |
|
"eval_precision": 58.84718498659517, |
|
"eval_recall": 66.1144578313253, |
|
"eval_runtime": 8.8611, |
|
"eval_samples_per_second": 133.053, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 3.3509700176366843e-06, |
|
"loss": 0.0834, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy_score": 87.6768578442312, |
|
"eval_f1": 40.52561281256491, |
|
"eval_loss": 0.6585069298744202, |
|
"eval_precision": 56.80628272251309, |
|
"eval_recall": 65.36144578313254, |
|
"eval_runtime": 8.9947, |
|
"eval_samples_per_second": 131.078, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0687, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_accuracy_score": 87.67239455478688, |
|
"eval_f1": 41.42806666569158, |
|
"eval_loss": 0.7016396522521973, |
|
"eval_precision": 57.929226736566186, |
|
"eval_recall": 66.56626506024097, |
|
"eval_runtime": 8.6369, |
|
"eval_samples_per_second": 136.508, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.0789, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_accuracy_score": 88.091943762553, |
|
"eval_f1": 40.693681724146366, |
|
"eval_loss": 0.6877384781837463, |
|
"eval_precision": 57.810413885180246, |
|
"eval_recall": 65.21084337349397, |
|
"eval_runtime": 8.9265, |
|
"eval_samples_per_second": 132.079, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 7.054673721340388e-07, |
|
"loss": 0.0634, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_accuracy_score": 87.85985271144834, |
|
"eval_f1": 40.37377917369942, |
|
"eval_loss": 0.7008724212646484, |
|
"eval_precision": 58.56573705179283, |
|
"eval_recall": 66.41566265060241, |
|
"eval_runtime": 8.753, |
|
"eval_samples_per_second": 134.697, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1890, |
|
"total_flos": 6.580960338659328e+16, |
|
"train_runtime": 3394.8711, |
|
"train_samples_per_second": 0.557 |
|
} |
|
], |
|
"max_steps": 1890, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.580960338659328e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|