{ "best_metric": 0.04516952857375145, "best_model_checkpoint": "./models/results_short_jokes_47/checkpoint-3400", "epoch": 0.6262086748319367, "global_step": 3400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.691, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.6000000000000001e-06, "loss": 0.6954, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 0.6964, "step": 24 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, "loss": 0.6855, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 0.6916, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 0.6845, "step": 48 }, { "epoch": 0.01, "learning_rate": 5.600000000000001e-06, "loss": 0.6708, "step": 56 }, { "epoch": 0.01, "learning_rate": 6.4000000000000006e-06, "loss": 0.6411, "step": 64 }, { "epoch": 0.01, "learning_rate": 7.2e-06, "loss": 0.5579, "step": 72 }, { "epoch": 0.01, "learning_rate": 7.9e-06, "loss": 0.3916, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.7e-06, "loss": 0.262, "step": 88 }, { "epoch": 0.02, "learning_rate": 9.5e-06, "loss": 0.2353, "step": 96 }, { "epoch": 0.02, "eval_accuracy": 0.9376661947024899, "eval_f1": 0.9405800441123219, "eval_loss": 0.1874532550573349, "eval_precision": 0.9014102280972963, "eval_recall": 0.9833086691674984, "eval_runtime": 312.5596, "eval_samples_per_second": 185.289, "eval_steps_per_second": 23.164, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.03e-05, "loss": 0.193, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.11e-05, "loss": 0.1671, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.19e-05, "loss": 0.1242, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.2600000000000001e-05, "loss": 0.1497, "step": 128 }, { "epoch": 0.03, "learning_rate": 1.3400000000000002e-05, "loss": 0.1173, "step": 136 }, { "epoch": 0.03, "learning_rate": 1.42e-05, "loss": 0.1598, "step": 144 }, { "epoch": 0.03, "learning_rate": 1.5e-05, "loss": 0.1312, "step": 152 }, { "epoch": 0.03, "learning_rate": 1.58e-05, "loss": 0.1374, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.66e-05, "loss": 0.1055, "step": 168 }, { "epoch": 0.03, "learning_rate": 1.74e-05, "loss": 0.1114, "step": 176 }, { "epoch": 0.03, "learning_rate": 1.8200000000000002e-05, "loss": 0.1404, "step": 184 }, { "epoch": 0.04, "learning_rate": 1.9e-05, "loss": 0.1289, "step": 192 }, { "epoch": 0.04, "learning_rate": 1.9800000000000004e-05, "loss": 0.136, "step": 200 }, { "epoch": 0.04, "eval_accuracy": 0.962185309251649, "eval_f1": 0.9613947257086447, "eval_loss": 0.11203108727931976, "eval_precision": 0.9854721549636803, "eval_recall": 0.9384657741680146, "eval_runtime": 315.0228, "eval_samples_per_second": 183.841, "eval_steps_per_second": 22.982, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.06e-05, "loss": 0.1112, "step": 208 }, { "epoch": 0.04, "learning_rate": 2.1400000000000002e-05, "loss": 0.1447, "step": 216 }, { "epoch": 0.04, "learning_rate": 2.22e-05, "loss": 0.143, "step": 224 }, { "epoch": 0.04, "learning_rate": 2.3000000000000003e-05, "loss": 0.1175, "step": 232 }, { "epoch": 0.04, "learning_rate": 2.38e-05, "loss": 0.1208, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.46e-05, "loss": 0.1563, "step": 248 }, { "epoch": 0.05, "learning_rate": 2.54e-05, "loss": 0.0701, "step": 256 }, { "epoch": 0.05, "learning_rate": 2.6200000000000003e-05, "loss": 0.0881, "step": 264 }, { "epoch": 0.05, "learning_rate": 2.7000000000000002e-05, "loss": 0.1091, "step": 272 }, { "epoch": 0.05, "learning_rate": 2.7800000000000005e-05, "loss": 0.1512, "step": 280 }, { "epoch": 0.05, "learning_rate": 2.86e-05, "loss": 0.0802, "step": 288 }, { "epoch": 0.05, "learning_rate": 2.94e-05, "loss": 0.103, "step": 296 }, { "epoch": 0.06, "eval_accuracy": 0.9501674897261456, "eval_f1": 0.9481028591979861, "eval_loss": 0.13948623836040497, "eval_precision": 0.992806839151885, "eval_recall": 0.9072512647554806, "eval_runtime": 304.8107, "eval_samples_per_second": 190.0, "eval_steps_per_second": 23.752, "step": 300 }, { "epoch": 0.06, "learning_rate": 3.02e-05, "loss": 0.0864, "step": 304 }, { "epoch": 0.06, "learning_rate": 3.1e-05, "loss": 0.0991, "step": 312 }, { "epoch": 0.06, "learning_rate": 3.18e-05, "loss": 0.0577, "step": 320 }, { "epoch": 0.06, "learning_rate": 3.26e-05, "loss": 0.1446, "step": 328 }, { "epoch": 0.06, "learning_rate": 3.3400000000000005e-05, "loss": 0.1009, "step": 336 }, { "epoch": 0.06, "learning_rate": 3.4200000000000005e-05, "loss": 0.1059, "step": 344 }, { "epoch": 0.06, "learning_rate": 3.5e-05, "loss": 0.1156, "step": 352 }, { "epoch": 0.07, "learning_rate": 3.58e-05, "loss": 0.0714, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.66e-05, "loss": 0.0862, "step": 368 }, { "epoch": 0.07, "learning_rate": 3.74e-05, "loss": 0.1634, "step": 376 }, { "epoch": 0.07, "learning_rate": 3.82e-05, "loss": 0.0953, "step": 384 }, { "epoch": 0.07, "learning_rate": 3.9000000000000006e-05, "loss": 0.0822, "step": 392 }, { "epoch": 0.07, "learning_rate": 3.9800000000000005e-05, "loss": 0.0768, "step": 400 }, { "epoch": 0.07, "eval_accuracy": 0.973028973995925, "eval_f1": 0.9733447098976109, "eval_loss": 0.08835102617740631, "eval_precision": 0.9653386589039705, "eval_recall": 0.9814846680662147, "eval_runtime": 320.6331, "eval_samples_per_second": 180.624, "eval_steps_per_second": 22.58, "step": 400 }, { "epoch": 0.08, "learning_rate": 4.0600000000000004e-05, "loss": 0.1154, "step": 408 }, { "epoch": 0.08, "learning_rate": 4.14e-05, "loss": 0.1243, "step": 416 }, { "epoch": 0.08, "learning_rate": 4.22e-05, "loss": 0.0727, "step": 424 }, { "epoch": 0.08, "learning_rate": 4.3e-05, "loss": 0.0955, "step": 432 }, { "epoch": 0.08, "learning_rate": 4.38e-05, "loss": 0.0822, "step": 440 }, { "epoch": 0.08, "learning_rate": 4.46e-05, "loss": 0.1043, "step": 448 }, { "epoch": 0.08, "learning_rate": 4.5400000000000006e-05, "loss": 0.1473, "step": 456 }, { "epoch": 0.09, "learning_rate": 4.6200000000000005e-05, "loss": 0.0848, "step": 464 }, { "epoch": 0.09, "learning_rate": 4.7e-05, "loss": 0.139, "step": 472 }, { "epoch": 0.09, "learning_rate": 4.78e-05, "loss": 0.103, "step": 480 }, { "epoch": 0.09, "learning_rate": 4.86e-05, "loss": 0.1075, "step": 488 }, { "epoch": 0.09, "learning_rate": 4.94e-05, "loss": 0.1285, "step": 496 }, { "epoch": 0.09, "eval_accuracy": 0.9650861622405636, "eval_f1": 0.9643335920412051, "eval_loss": 0.10335744917392731, "eval_precision": 0.9891442011941378, "eval_recall": 0.9407371717658396, "eval_runtime": 317.7591, "eval_samples_per_second": 182.258, "eval_steps_per_second": 22.785, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.997971190910936e-05, "loss": 0.085, "step": 504 }, { "epoch": 0.09, "learning_rate": 4.989855954554677e-05, "loss": 0.1381, "step": 512 }, { "epoch": 0.1, "learning_rate": 4.9817407181984176e-05, "loss": 0.1202, "step": 520 }, { "epoch": 0.1, "learning_rate": 4.973625481842159e-05, "loss": 0.0778, "step": 528 }, { "epoch": 0.1, "learning_rate": 4.9655102454859e-05, "loss": 0.0872, "step": 536 }, { "epoch": 0.1, "learning_rate": 4.957395009129641e-05, "loss": 0.1364, "step": 544 }, { "epoch": 0.1, "learning_rate": 4.9492797727733825e-05, "loss": 0.1305, "step": 552 }, { "epoch": 0.1, "learning_rate": 4.9411645364171234e-05, "loss": 0.0677, "step": 560 }, { "epoch": 0.1, "learning_rate": 4.933049300060865e-05, "loss": 0.1187, "step": 568 }, { "epoch": 0.11, "learning_rate": 4.924934063704605e-05, "loss": 0.1605, "step": 576 }, { "epoch": 0.11, "learning_rate": 4.916818827348347e-05, "loss": 0.1732, "step": 584 }, { "epoch": 0.11, "learning_rate": 4.908703590992088e-05, "loss": 0.104, "step": 592 }, { "epoch": 0.11, "learning_rate": 4.900588354635829e-05, "loss": 0.0872, "step": 600 }, { "epoch": 0.11, "eval_accuracy": 0.973305245709155, "eval_f1": 0.9731382701462973, "eval_loss": 0.081203892827034, "eval_precision": 0.982699933326315, "eval_recall": 0.9637608837801562, "eval_runtime": 323.1202, "eval_samples_per_second": 179.234, "eval_steps_per_second": 22.407, "step": 600 }, { "epoch": 0.11, "learning_rate": 4.89247311827957e-05, "loss": 0.0921, "step": 608 }, { "epoch": 0.11, "learning_rate": 4.884357881923311e-05, "loss": 0.069, "step": 616 }, { "epoch": 0.11, "learning_rate": 4.8762426455670526e-05, "loss": 0.0517, "step": 624 }, { "epoch": 0.12, "learning_rate": 4.8681274092107935e-05, "loss": 0.1178, "step": 632 }, { "epoch": 0.12, "learning_rate": 4.8600121728545343e-05, "loss": 0.154, "step": 640 }, { "epoch": 0.12, "learning_rate": 4.851896936498276e-05, "loss": 0.0999, "step": 648 }, { "epoch": 0.12, "learning_rate": 4.843781700142017e-05, "loss": 0.0949, "step": 656 }, { "epoch": 0.12, "learning_rate": 4.835666463785758e-05, "loss": 0.0821, "step": 664 }, { "epoch": 0.12, "learning_rate": 4.827551227429499e-05, "loss": 0.0947, "step": 672 }, { "epoch": 0.13, "learning_rate": 4.81943599107324e-05, "loss": 0.1451, "step": 680 }, { "epoch": 0.13, "learning_rate": 4.811320754716982e-05, "loss": 0.084, "step": 688 }, { "epoch": 0.13, "learning_rate": 4.8032055183607226e-05, "loss": 0.0659, "step": 696 }, { "epoch": 0.13, "eval_accuracy": 0.9683668888351694, "eval_f1": 0.9678054266835373, "eval_loss": 0.13082686066627502, "eval_precision": 0.9888318310769563, "eval_recall": 0.9476546099046701, "eval_runtime": 319.8968, "eval_samples_per_second": 181.04, "eval_steps_per_second": 22.632, "step": 700 }, { "epoch": 0.13, "learning_rate": 4.7950902820044635e-05, "loss": 0.1008, "step": 704 }, { "epoch": 0.13, "learning_rate": 4.786975045648205e-05, "loss": 0.1163, "step": 712 }, { "epoch": 0.13, "learning_rate": 4.778859809291946e-05, "loss": 0.0985, "step": 720 }, { "epoch": 0.13, "learning_rate": 4.770744572935687e-05, "loss": 0.1044, "step": 728 }, { "epoch": 0.14, "learning_rate": 4.7626293365794284e-05, "loss": 0.0954, "step": 736 }, { "epoch": 0.14, "learning_rate": 4.754514100223169e-05, "loss": 0.084, "step": 744 }, { "epoch": 0.14, "learning_rate": 4.74639886386691e-05, "loss": 0.1018, "step": 752 }, { "epoch": 0.14, "learning_rate": 4.738283627510651e-05, "loss": 0.0698, "step": 760 }, { "epoch": 0.14, "learning_rate": 4.730168391154393e-05, "loss": 0.1247, "step": 768 }, { "epoch": 0.14, "learning_rate": 4.722053154798134e-05, "loss": 0.0551, "step": 776 }, { "epoch": 0.14, "learning_rate": 4.7139379184418745e-05, "loss": 0.0788, "step": 784 }, { "epoch": 0.15, "learning_rate": 4.705822682085616e-05, "loss": 0.1147, "step": 792 }, { "epoch": 0.15, "learning_rate": 4.697707445729357e-05, "loss": 0.0704, "step": 800 }, { "epoch": 0.15, "eval_accuracy": 0.9718720861967746, "eval_f1": 0.9722992160797185, "eval_loss": 0.08833611011505127, "eval_precision": 0.9609747899159664, "eval_recall": 0.9838937261245139, "eval_runtime": 320.3064, "eval_samples_per_second": 180.808, "eval_steps_per_second": 22.603, "step": 800 }, { "epoch": 0.15, "learning_rate": 4.6895922093730985e-05, "loss": 0.1034, "step": 808 }, { "epoch": 0.15, "learning_rate": 4.6814769730168394e-05, "loss": 0.1137, "step": 816 }, { "epoch": 0.15, "learning_rate": 4.67336173666058e-05, "loss": 0.0647, "step": 824 }, { "epoch": 0.15, "learning_rate": 4.665246500304322e-05, "loss": 0.1009, "step": 832 }, { "epoch": 0.15, "learning_rate": 4.657131263948063e-05, "loss": 0.0659, "step": 840 }, { "epoch": 0.16, "learning_rate": 4.6490160275918036e-05, "loss": 0.0619, "step": 848 }, { "epoch": 0.16, "learning_rate": 4.640900791235545e-05, "loss": 0.0762, "step": 856 }, { "epoch": 0.16, "learning_rate": 4.632785554879286e-05, "loss": 0.0348, "step": 864 }, { "epoch": 0.16, "learning_rate": 4.624670318523027e-05, "loss": 0.0976, "step": 872 }, { "epoch": 0.16, "learning_rate": 4.6165550821667686e-05, "loss": 0.0727, "step": 880 }, { "epoch": 0.16, "learning_rate": 4.6084398458105094e-05, "loss": 0.0572, "step": 888 }, { "epoch": 0.17, "learning_rate": 4.600324609454251e-05, "loss": 0.0582, "step": 896 }, { "epoch": 0.17, "eval_accuracy": 0.973028973995925, "eval_f1": 0.973034561336878, "eval_loss": 0.08047471195459366, "eval_precision": 0.9762028473449028, "eval_recall": 0.9698867742712599, "eval_runtime": 320.2553, "eval_samples_per_second": 180.837, "eval_steps_per_second": 22.607, "step": 900 }, { "epoch": 0.17, "learning_rate": 4.592209373097991e-05, "loss": 0.098, "step": 904 }, { "epoch": 0.17, "learning_rate": 4.584094136741733e-05, "loss": 0.1056, "step": 912 }, { "epoch": 0.17, "learning_rate": 4.5759789003854744e-05, "loss": 0.0511, "step": 920 }, { "epoch": 0.17, "learning_rate": 4.567863664029215e-05, "loss": 0.092, "step": 928 }, { "epoch": 0.17, "learning_rate": 4.559748427672956e-05, "loss": 0.0727, "step": 936 }, { "epoch": 0.17, "learning_rate": 4.551633191316697e-05, "loss": 0.0495, "step": 944 }, { "epoch": 0.18, "learning_rate": 4.5435179549604386e-05, "loss": 0.0912, "step": 952 }, { "epoch": 0.18, "learning_rate": 4.5354027186041795e-05, "loss": 0.0717, "step": 960 }, { "epoch": 0.18, "learning_rate": 4.5272874822479204e-05, "loss": 0.0395, "step": 968 }, { "epoch": 0.18, "learning_rate": 4.519172245891662e-05, "loss": 0.1236, "step": 976 }, { "epoch": 0.18, "learning_rate": 4.511057009535403e-05, "loss": 0.1675, "step": 984 }, { "epoch": 0.18, "learning_rate": 4.502941773179144e-05, "loss": 0.0863, "step": 992 }, { "epoch": 0.18, "learning_rate": 4.494826536822885e-05, "loss": 0.0867, "step": 1000 }, { "epoch": 0.18, "eval_accuracy": 0.9773457195151432, "eval_f1": 0.977261698440208, "eval_loss": 0.06294739246368408, "eval_precision": 0.984324267709388, "eval_recall": 0.9702997556526827, "eval_runtime": 318.1602, "eval_samples_per_second": 182.028, "eval_steps_per_second": 22.756, "step": 1000 }, { "epoch": 0.19, "learning_rate": 4.486711300466626e-05, "loss": 0.1165, "step": 1008 }, { "epoch": 0.19, "learning_rate": 4.478596064110368e-05, "loss": 0.083, "step": 1016 }, { "epoch": 0.19, "learning_rate": 4.470480827754109e-05, "loss": 0.083, "step": 1024 }, { "epoch": 0.19, "learning_rate": 4.4623655913978496e-05, "loss": 0.0775, "step": 1032 }, { "epoch": 0.19, "learning_rate": 4.454250355041591e-05, "loss": 0.0958, "step": 1040 }, { "epoch": 0.19, "learning_rate": 4.446135118685332e-05, "loss": 0.0814, "step": 1048 }, { "epoch": 0.19, "learning_rate": 4.438019882329073e-05, "loss": 0.0633, "step": 1056 }, { "epoch": 0.2, "learning_rate": 4.4299046459728145e-05, "loss": 0.0876, "step": 1064 }, { "epoch": 0.2, "learning_rate": 4.4217894096165554e-05, "loss": 0.0853, "step": 1072 }, { "epoch": 0.2, "learning_rate": 4.413674173260296e-05, "loss": 0.0678, "step": 1080 }, { "epoch": 0.2, "learning_rate": 4.405558936904037e-05, "loss": 0.0541, "step": 1088 }, { "epoch": 0.2, "learning_rate": 4.397443700547779e-05, "loss": 0.1059, "step": 1096 }, { "epoch": 0.2, "eval_accuracy": 0.9705252615947785, "eval_f1": 0.9711601817905354, "eval_loss": 0.08772066235542297, "eval_precision": 0.9538364529403955, "eval_recall": 0.9891248236225351, "eval_runtime": 320.9793, "eval_samples_per_second": 180.429, "eval_steps_per_second": 22.556, "step": 1100 }, { "epoch": 0.2, "learning_rate": 4.38932846419152e-05, "loss": 0.0416, "step": 1104 }, { "epoch": 0.2, "learning_rate": 4.3812132278352605e-05, "loss": 0.108, "step": 1112 }, { "epoch": 0.21, "learning_rate": 4.373097991479002e-05, "loss": 0.0706, "step": 1120 }, { "epoch": 0.21, "learning_rate": 4.364982755122743e-05, "loss": 0.1139, "step": 1128 }, { "epoch": 0.21, "learning_rate": 4.3568675187664845e-05, "loss": 0.0468, "step": 1136 }, { "epoch": 0.21, "learning_rate": 4.3487522824102254e-05, "loss": 0.113, "step": 1144 }, { "epoch": 0.21, "learning_rate": 4.340637046053966e-05, "loss": 0.0934, "step": 1152 }, { "epoch": 0.21, "learning_rate": 4.332521809697708e-05, "loss": 0.1363, "step": 1160 }, { "epoch": 0.22, "learning_rate": 4.324406573341449e-05, "loss": 0.0816, "step": 1168 }, { "epoch": 0.22, "learning_rate": 4.31629133698519e-05, "loss": 0.0846, "step": 1176 }, { "epoch": 0.22, "learning_rate": 4.308176100628931e-05, "loss": 0.0652, "step": 1184 }, { "epoch": 0.22, "learning_rate": 4.300060864272672e-05, "loss": 0.1003, "step": 1192 }, { "epoch": 0.22, "learning_rate": 4.291945627916413e-05, "loss": 0.1021, "step": 1200 }, { "epoch": 0.22, "eval_accuracy": 0.9757571571640709, "eval_f1": 0.9760752504941722, "eval_loss": 0.0945325568318367, "eval_precision": 0.966685793364161, "eval_recall": 0.9856488969955605, "eval_runtime": 324.5414, "eval_samples_per_second": 178.449, "eval_steps_per_second": 22.308, "step": 1200 }, { "epoch": 0.22, "learning_rate": 4.2838303915601546e-05, "loss": 0.0968, "step": 1208 }, { "epoch": 0.22, "learning_rate": 4.2757151552038955e-05, "loss": 0.076, "step": 1216 }, { "epoch": 0.23, "learning_rate": 4.267599918847637e-05, "loss": 0.0614, "step": 1224 }, { "epoch": 0.23, "learning_rate": 4.259484682491377e-05, "loss": 0.0889, "step": 1232 }, { "epoch": 0.23, "learning_rate": 4.251369446135119e-05, "loss": 0.0951, "step": 1240 }, { "epoch": 0.23, "learning_rate": 4.2432542097788604e-05, "loss": 0.0406, "step": 1248 }, { "epoch": 0.23, "learning_rate": 4.235138973422601e-05, "loss": 0.0786, "step": 1256 }, { "epoch": 0.23, "learning_rate": 4.227023737066342e-05, "loss": 0.104, "step": 1264 }, { "epoch": 0.23, "learning_rate": 4.218908500710083e-05, "loss": 0.1195, "step": 1272 }, { "epoch": 0.24, "learning_rate": 4.210793264353825e-05, "loss": 0.0644, "step": 1280 }, { "epoch": 0.24, "learning_rate": 4.2026780279975656e-05, "loss": 0.0485, "step": 1288 }, { "epoch": 0.24, "learning_rate": 4.1945627916413064e-05, "loss": 0.0737, "step": 1296 }, { "epoch": 0.24, "eval_accuracy": 0.9794522913285216, "eval_f1": 0.9795680093403386, "eval_loss": 0.062313616275787354, "eval_precision": 0.9774199074867226, "eval_recall": 0.9817255738720446, "eval_runtime": 318.2777, "eval_samples_per_second": 181.961, "eval_steps_per_second": 22.747, "step": 1300 }, { "epoch": 0.24, "learning_rate": 4.186447555285048e-05, "loss": 0.1107, "step": 1304 }, { "epoch": 0.24, "learning_rate": 4.178332318928789e-05, "loss": 0.0381, "step": 1312 }, { "epoch": 0.24, "learning_rate": 4.17021708257253e-05, "loss": 0.0901, "step": 1320 }, { "epoch": 0.24, "learning_rate": 4.1621018462162714e-05, "loss": 0.1092, "step": 1328 }, { "epoch": 0.25, "learning_rate": 4.153986609860012e-05, "loss": 0.0992, "step": 1336 }, { "epoch": 0.25, "learning_rate": 4.145871373503754e-05, "loss": 0.0763, "step": 1344 }, { "epoch": 0.25, "learning_rate": 4.137756137147495e-05, "loss": 0.0871, "step": 1352 }, { "epoch": 0.25, "learning_rate": 4.1296409007912356e-05, "loss": 0.0894, "step": 1360 }, { "epoch": 0.25, "learning_rate": 4.121525664434977e-05, "loss": 0.0837, "step": 1368 }, { "epoch": 0.25, "learning_rate": 4.113410428078718e-05, "loss": 0.0786, "step": 1376 }, { "epoch": 0.25, "learning_rate": 4.105295191722459e-05, "loss": 0.0617, "step": 1384 }, { "epoch": 0.26, "learning_rate": 4.0971799553662005e-05, "loss": 0.1143, "step": 1392 }, { "epoch": 0.26, "learning_rate": 4.0890647190099414e-05, "loss": 0.0505, "step": 1400 }, { "epoch": 0.26, "eval_accuracy": 0.980471043271057, "eval_f1": 0.9804956283304879, "eval_loss": 0.060013771057128906, "eval_precision": 0.9826477704804701, "eval_recall": 0.9783528925904257, "eval_runtime": 321.185, "eval_samples_per_second": 180.314, "eval_steps_per_second": 22.542, "step": 1400 }, { "epoch": 0.26, "learning_rate": 4.080949482653682e-05, "loss": 0.0471, "step": 1408 }, { "epoch": 0.26, "learning_rate": 4.072834246297423e-05, "loss": 0.0651, "step": 1416 }, { "epoch": 0.26, "learning_rate": 4.064719009941165e-05, "loss": 0.0796, "step": 1424 }, { "epoch": 0.26, "learning_rate": 4.0566037735849064e-05, "loss": 0.0486, "step": 1432 }, { "epoch": 0.27, "learning_rate": 4.0484885372286466e-05, "loss": 0.1081, "step": 1440 }, { "epoch": 0.27, "learning_rate": 4.040373300872388e-05, "loss": 0.0397, "step": 1448 }, { "epoch": 0.27, "learning_rate": 4.032258064516129e-05, "loss": 0.0586, "step": 1456 }, { "epoch": 0.27, "learning_rate": 4.0241428281598706e-05, "loss": 0.0511, "step": 1464 }, { "epoch": 0.27, "learning_rate": 4.0160275918036115e-05, "loss": 0.0273, "step": 1472 }, { "epoch": 0.27, "learning_rate": 4.0079123554473524e-05, "loss": 0.09, "step": 1480 }, { "epoch": 0.27, "learning_rate": 3.999797119091094e-05, "loss": 0.1026, "step": 1488 }, { "epoch": 0.28, "learning_rate": 3.991681882734835e-05, "loss": 0.1159, "step": 1496 }, { "epoch": 0.28, "eval_accuracy": 0.979538626238906, "eval_f1": 0.9796374258956955, "eval_loss": 0.059322111308574677, "eval_precision": 0.9782757910632164, "eval_recall": 0.9810028564545549, "eval_runtime": 320.1061, "eval_samples_per_second": 180.921, "eval_steps_per_second": 22.618, "step": 1500 }, { "epoch": 0.28, "learning_rate": 3.983566646378576e-05, "loss": 0.055, "step": 1504 }, { "epoch": 0.28, "learning_rate": 3.975451410022317e-05, "loss": 0.0556, "step": 1512 }, { "epoch": 0.28, "learning_rate": 3.967336173666058e-05, "loss": 0.0569, "step": 1520 }, { "epoch": 0.28, "learning_rate": 3.959220937309799e-05, "loss": 0.0622, "step": 1528 }, { "epoch": 0.28, "learning_rate": 3.9511057009535407e-05, "loss": 0.0546, "step": 1536 }, { "epoch": 0.28, "learning_rate": 3.9429904645972815e-05, "loss": 0.08, "step": 1544 }, { "epoch": 0.29, "learning_rate": 3.934875228241023e-05, "loss": 0.0657, "step": 1552 }, { "epoch": 0.29, "learning_rate": 3.926759991884763e-05, "loss": 0.0414, "step": 1560 }, { "epoch": 0.29, "learning_rate": 3.918644755528505e-05, "loss": 0.0747, "step": 1568 }, { "epoch": 0.29, "learning_rate": 3.9105295191722465e-05, "loss": 0.0617, "step": 1576 }, { "epoch": 0.29, "learning_rate": 3.9024142828159874e-05, "loss": 0.0318, "step": 1584 }, { "epoch": 0.29, "learning_rate": 3.894299046459728e-05, "loss": 0.1025, "step": 1592 }, { "epoch": 0.29, "learning_rate": 3.886183810103469e-05, "loss": 0.1075, "step": 1600 }, { "epoch": 0.29, "eval_accuracy": 0.9777946610491418, "eval_f1": 0.9776805859280085, "eval_loss": 0.07334825396537781, "eval_precision": 0.9861699520324919, "eval_recall": 0.969336132429363, "eval_runtime": 318.519, "eval_samples_per_second": 181.823, "eval_steps_per_second": 22.73, "step": 1600 }, { "epoch": 0.3, "learning_rate": 3.878068573747211e-05, "loss": 0.0735, "step": 1608 }, { "epoch": 0.3, "learning_rate": 3.8699533373909516e-05, "loss": 0.1103, "step": 1616 }, { "epoch": 0.3, "learning_rate": 3.8618381010346925e-05, "loss": 0.0638, "step": 1624 }, { "epoch": 0.3, "learning_rate": 3.853722864678434e-05, "loss": 0.0608, "step": 1632 }, { "epoch": 0.3, "learning_rate": 3.845607628322175e-05, "loss": 0.0609, "step": 1640 }, { "epoch": 0.3, "learning_rate": 3.837492391965916e-05, "loss": 0.0606, "step": 1648 }, { "epoch": 0.31, "learning_rate": 3.8293771556096574e-05, "loss": 0.0834, "step": 1656 }, { "epoch": 0.31, "learning_rate": 3.821261919253398e-05, "loss": 0.0889, "step": 1664 }, { "epoch": 0.31, "learning_rate": 3.81314668289714e-05, "loss": 0.0464, "step": 1672 }, { "epoch": 0.31, "learning_rate": 3.805031446540881e-05, "loss": 0.0473, "step": 1680 }, { "epoch": 0.31, "learning_rate": 3.796916210184622e-05, "loss": 0.0484, "step": 1688 }, { "epoch": 0.31, "learning_rate": 3.788800973828363e-05, "loss": 0.0938, "step": 1696 }, { "epoch": 0.31, "eval_accuracy": 0.9813343923749007, "eval_f1": 0.9813185863648146, "eval_loss": 0.06325095146894455, "eval_precision": 0.9855595667870036, "eval_recall": 0.9771139484461575, "eval_runtime": 318.5283, "eval_samples_per_second": 181.817, "eval_steps_per_second": 22.73, "step": 1700 }, { "epoch": 0.31, "learning_rate": 3.780685737472104e-05, "loss": 0.0528, "step": 1704 }, { "epoch": 0.32, "learning_rate": 3.772570501115845e-05, "loss": 0.0541, "step": 1712 }, { "epoch": 0.32, "learning_rate": 3.7644552647595866e-05, "loss": 0.0737, "step": 1720 }, { "epoch": 0.32, "learning_rate": 3.7563400284033275e-05, "loss": 0.0593, "step": 1728 }, { "epoch": 0.32, "learning_rate": 3.7482247920470684e-05, "loss": 0.0866, "step": 1736 }, { "epoch": 0.32, "learning_rate": 3.740109555690809e-05, "loss": 0.0564, "step": 1744 }, { "epoch": 0.32, "learning_rate": 3.731994319334551e-05, "loss": 0.0599, "step": 1752 }, { "epoch": 0.32, "learning_rate": 3.7238790829782924e-05, "loss": 0.0726, "step": 1760 }, { "epoch": 0.33, "learning_rate": 3.7157638466220326e-05, "loss": 0.0675, "step": 1768 }, { "epoch": 0.33, "learning_rate": 3.707648610265774e-05, "loss": 0.0783, "step": 1776 }, { "epoch": 0.33, "learning_rate": 3.699533373909515e-05, "loss": 0.065, "step": 1784 }, { "epoch": 0.33, "learning_rate": 3.6914181375532566e-05, "loss": 0.0599, "step": 1792 }, { "epoch": 0.33, "learning_rate": 3.6833029011969975e-05, "loss": 0.0708, "step": 1800 }, { "epoch": 0.33, "eval_accuracy": 0.9794695583105985, "eval_f1": 0.9794158890639337, "eval_loss": 0.06451611965894699, "eval_precision": 0.9854037483452937, "eval_recall": 0.9735003613587088, "eval_runtime": 315.4864, "eval_samples_per_second": 183.57, "eval_steps_per_second": 22.949, "step": 1800 }, { "epoch": 0.33, "learning_rate": 3.6751876648407384e-05, "loss": 0.0811, "step": 1808 }, { "epoch": 0.33, "learning_rate": 3.66707242848448e-05, "loss": 0.0723, "step": 1816 }, { "epoch": 0.34, "learning_rate": 3.658957192128221e-05, "loss": 0.0748, "step": 1824 }, { "epoch": 0.34, "learning_rate": 3.650841955771962e-05, "loss": 0.0869, "step": 1832 }, { "epoch": 0.34, "learning_rate": 3.6427267194157034e-05, "loss": 0.0578, "step": 1840 }, { "epoch": 0.34, "learning_rate": 3.634611483059444e-05, "loss": 0.0813, "step": 1848 }, { "epoch": 0.34, "learning_rate": 3.626496246703185e-05, "loss": 0.0748, "step": 1856 }, { "epoch": 0.34, "learning_rate": 3.618381010346927e-05, "loss": 0.0686, "step": 1864 }, { "epoch": 0.34, "learning_rate": 3.6102657739906676e-05, "loss": 0.0697, "step": 1872 }, { "epoch": 0.35, "learning_rate": 3.602150537634409e-05, "loss": 0.0728, "step": 1880 }, { "epoch": 0.35, "learning_rate": 3.5940353012781494e-05, "loss": 0.079, "step": 1888 }, { "epoch": 0.35, "learning_rate": 3.585920064921891e-05, "loss": 0.0719, "step": 1896 }, { "epoch": 0.35, "eval_accuracy": 0.9799012328625203, "eval_f1": 0.9800301948943178, "eval_loss": 0.0590737946331501, "eval_precision": 0.9771133385789059, "eval_recall": 0.9829645180163128, "eval_runtime": 317.0745, "eval_samples_per_second": 182.651, "eval_steps_per_second": 22.834, "step": 1900 }, { "epoch": 0.35, "learning_rate": 3.5778048285656325e-05, "loss": 0.0502, "step": 1904 }, { "epoch": 0.35, "learning_rate": 3.5696895922093734e-05, "loss": 0.0592, "step": 1912 }, { "epoch": 0.35, "learning_rate": 3.561574355853114e-05, "loss": 0.0849, "step": 1920 }, { "epoch": 0.36, "learning_rate": 3.553459119496855e-05, "loss": 0.1225, "step": 1928 }, { "epoch": 0.36, "learning_rate": 3.545343883140597e-05, "loss": 0.0855, "step": 1936 }, { "epoch": 0.36, "learning_rate": 3.5372286467843377e-05, "loss": 0.0652, "step": 1944 }, { "epoch": 0.36, "learning_rate": 3.5291134104280785e-05, "loss": 0.0585, "step": 1952 }, { "epoch": 0.36, "learning_rate": 3.52099817407182e-05, "loss": 0.0544, "step": 1960 }, { "epoch": 0.36, "learning_rate": 3.512882937715561e-05, "loss": 0.0836, "step": 1968 }, { "epoch": 0.36, "learning_rate": 3.504767701359302e-05, "loss": 0.0505, "step": 1976 }, { "epoch": 0.37, "learning_rate": 3.4966524650030435e-05, "loss": 0.0435, "step": 1984 }, { "epoch": 0.37, "learning_rate": 3.4885372286467844e-05, "loss": 0.078, "step": 1992 }, { "epoch": 0.37, "learning_rate": 3.480421992290526e-05, "loss": 0.0365, "step": 2000 }, { "epoch": 0.37, "eval_accuracy": 0.9795731602030597, "eval_f1": 0.9794998873620184, "eval_loss": 0.08517105132341385, "eval_precision": 0.986457242582897, "eval_recall": 0.9726399834807448, "eval_runtime": 319.0106, "eval_samples_per_second": 181.543, "eval_steps_per_second": 22.695, "step": 2000 }, { "epoch": 0.37, "learning_rate": 3.472306755934267e-05, "loss": 0.0755, "step": 2008 }, { "epoch": 0.37, "learning_rate": 3.464191519578008e-05, "loss": 0.0446, "step": 2016 }, { "epoch": 0.37, "learning_rate": 3.456076283221749e-05, "loss": 0.045, "step": 2024 }, { "epoch": 0.37, "learning_rate": 3.44796104686549e-05, "loss": 0.0446, "step": 2032 }, { "epoch": 0.38, "learning_rate": 3.439845810509231e-05, "loss": 0.0601, "step": 2040 }, { "epoch": 0.38, "learning_rate": 3.4317305741529726e-05, "loss": 0.0784, "step": 2048 }, { "epoch": 0.38, "learning_rate": 3.4236153377967135e-05, "loss": 0.0601, "step": 2056 }, { "epoch": 0.38, "learning_rate": 3.4155001014404544e-05, "loss": 0.0628, "step": 2064 }, { "epoch": 0.38, "learning_rate": 3.407384865084195e-05, "loss": 0.0805, "step": 2072 }, { "epoch": 0.38, "learning_rate": 3.399269628727937e-05, "loss": 0.0429, "step": 2080 }, { "epoch": 0.38, "learning_rate": 3.3911543923716785e-05, "loss": 0.0589, "step": 2088 }, { "epoch": 0.39, "learning_rate": 3.383039156015419e-05, "loss": 0.0564, "step": 2096 }, { "epoch": 0.39, "eval_accuracy": 0.9813171253928239, "eval_f1": 0.9813274427915645, "eval_loss": 0.05837487801909447, "eval_precision": 0.9841808300737305, "eval_recall": 0.9784905530508999, "eval_runtime": 319.719, "eval_samples_per_second": 181.14, "eval_steps_per_second": 22.645, "step": 2100 }, { "epoch": 0.39, "learning_rate": 3.37492391965916e-05, "loss": 0.0755, "step": 2104 }, { "epoch": 0.39, "learning_rate": 3.366808683302901e-05, "loss": 0.0578, "step": 2112 }, { "epoch": 0.39, "learning_rate": 3.358693446946643e-05, "loss": 0.076, "step": 2120 }, { "epoch": 0.39, "learning_rate": 3.3505782105903836e-05, "loss": 0.0767, "step": 2128 }, { "epoch": 0.39, "learning_rate": 3.3424629742341245e-05, "loss": 0.0393, "step": 2136 }, { "epoch": 0.39, "learning_rate": 3.334347737877866e-05, "loss": 0.057, "step": 2144 }, { "epoch": 0.4, "learning_rate": 3.326232501521607e-05, "loss": 0.0671, "step": 2152 }, { "epoch": 0.4, "learning_rate": 3.318117265165348e-05, "loss": 0.0338, "step": 2160 }, { "epoch": 0.4, "learning_rate": 3.3100020288090894e-05, "loss": 0.0637, "step": 2168 }, { "epoch": 0.4, "learning_rate": 3.30188679245283e-05, "loss": 0.0585, "step": 2176 }, { "epoch": 0.4, "learning_rate": 3.293771556096571e-05, "loss": 0.0699, "step": 2184 }, { "epoch": 0.4, "learning_rate": 3.285656319740313e-05, "loss": 0.0357, "step": 2192 }, { "epoch": 0.41, "learning_rate": 3.2775410833840536e-05, "loss": 0.0951, "step": 2200 }, { "epoch": 0.41, "eval_accuracy": 0.9820250716579756, "eval_f1": 0.9820328276290582, "eval_loss": 0.05581849440932274, "eval_precision": 0.9850079634374351, "eval_recall": 0.9790756100079154, "eval_runtime": 315.2538, "eval_samples_per_second": 183.706, "eval_steps_per_second": 22.966, "step": 2200 }, { "epoch": 0.41, "learning_rate": 3.269425847027795e-05, "loss": 0.0712, "step": 2208 }, { "epoch": 0.41, "learning_rate": 3.2613106106715354e-05, "loss": 0.0666, "step": 2216 }, { "epoch": 0.41, "learning_rate": 3.253195374315277e-05, "loss": 0.0806, "step": 2224 }, { "epoch": 0.41, "learning_rate": 3.2450801379590186e-05, "loss": 0.0511, "step": 2232 }, { "epoch": 0.41, "learning_rate": 3.2369649016027595e-05, "loss": 0.0523, "step": 2240 }, { "epoch": 0.41, "learning_rate": 3.2288496652465004e-05, "loss": 0.0738, "step": 2248 }, { "epoch": 0.42, "learning_rate": 3.220734428890241e-05, "loss": 0.0555, "step": 2256 }, { "epoch": 0.42, "learning_rate": 3.212619192533983e-05, "loss": 0.0723, "step": 2264 }, { "epoch": 0.42, "learning_rate": 3.204503956177724e-05, "loss": 0.0603, "step": 2272 }, { "epoch": 0.42, "learning_rate": 3.1963887198214646e-05, "loss": 0.0477, "step": 2280 }, { "epoch": 0.42, "learning_rate": 3.188273483465206e-05, "loss": 0.0724, "step": 2288 }, { "epoch": 0.42, "learning_rate": 3.180158247108947e-05, "loss": 0.0909, "step": 2296 }, { "epoch": 0.42, "eval_accuracy": 0.9805573781814414, "eval_f1": 0.9807237990892596, "eval_loss": 0.05231529101729393, "eval_precision": 0.9757127771911299, "eval_recall": 0.9857865574560347, "eval_runtime": 314.1871, "eval_samples_per_second": 184.33, "eval_steps_per_second": 23.044, "step": 2300 }, { "epoch": 0.42, "learning_rate": 3.172043010752688e-05, "loss": 0.0451, "step": 2304 }, { "epoch": 0.43, "learning_rate": 3.1639277743964295e-05, "loss": 0.0539, "step": 2312 }, { "epoch": 0.43, "learning_rate": 3.1558125380401704e-05, "loss": 0.0669, "step": 2320 }, { "epoch": 0.43, "learning_rate": 3.147697301683912e-05, "loss": 0.0439, "step": 2328 }, { "epoch": 0.43, "learning_rate": 3.139582065327653e-05, "loss": 0.0849, "step": 2336 }, { "epoch": 0.43, "learning_rate": 3.131466828971394e-05, "loss": 0.0798, "step": 2344 }, { "epoch": 0.43, "learning_rate": 3.123351592615135e-05, "loss": 0.0699, "step": 2352 }, { "epoch": 0.43, "learning_rate": 3.115236356258876e-05, "loss": 0.0707, "step": 2360 }, { "epoch": 0.44, "learning_rate": 3.107121119902617e-05, "loss": 0.0432, "step": 2368 }, { "epoch": 0.44, "learning_rate": 3.099005883546359e-05, "loss": 0.074, "step": 2376 }, { "epoch": 0.44, "learning_rate": 3.0908906471900996e-05, "loss": 0.091, "step": 2384 }, { "epoch": 0.44, "learning_rate": 3.0827754108338405e-05, "loss": 0.0482, "step": 2392 }, { "epoch": 0.44, "learning_rate": 3.0746601744775814e-05, "loss": 0.0673, "step": 2400 }, { "epoch": 0.44, "eval_accuracy": 0.9821804744966675, "eval_f1": 0.9821866261608035, "eval_loss": 0.05170663446187973, "eval_precision": 0.9852477750458842, "eval_recall": 0.9791444402381526, "eval_runtime": 314.3647, "eval_samples_per_second": 184.226, "eval_steps_per_second": 23.031, "step": 2400 }, { "epoch": 0.44, "learning_rate": 3.066544938121323e-05, "loss": 0.0329, "step": 2408 }, { "epoch": 0.44, "learning_rate": 3.0584297017650645e-05, "loss": 0.0723, "step": 2416 }, { "epoch": 0.45, "learning_rate": 3.050314465408805e-05, "loss": 0.0806, "step": 2424 }, { "epoch": 0.45, "learning_rate": 3.0421992290525463e-05, "loss": 0.0854, "step": 2432 }, { "epoch": 0.45, "learning_rate": 3.0340839926962872e-05, "loss": 0.09, "step": 2440 }, { "epoch": 0.45, "learning_rate": 3.0259687563400284e-05, "loss": 0.0537, "step": 2448 }, { "epoch": 0.45, "learning_rate": 3.01785351998377e-05, "loss": 0.0342, "step": 2456 }, { "epoch": 0.45, "learning_rate": 3.0097382836275105e-05, "loss": 0.0874, "step": 2464 }, { "epoch": 0.46, "learning_rate": 3.001623047271252e-05, "loss": 0.0435, "step": 2472 }, { "epoch": 0.46, "learning_rate": 2.9935078109149927e-05, "loss": 0.08, "step": 2480 }, { "epoch": 0.46, "learning_rate": 2.9853925745587342e-05, "loss": 0.0496, "step": 2488 }, { "epoch": 0.46, "learning_rate": 2.9772773382024755e-05, "loss": 0.0338, "step": 2496 }, { "epoch": 0.46, "eval_accuracy": 0.9790551507407536, "eval_f1": 0.9788532278028625, "eval_loss": 0.07079575955867767, "eval_precision": 0.9918739400791408, "eval_recall": 0.9661699418384555, "eval_runtime": 321.7416, "eval_samples_per_second": 180.002, "eval_steps_per_second": 22.503, "step": 2500 }, { "epoch": 0.46, "learning_rate": 2.9691621018462163e-05, "loss": 0.1099, "step": 2504 }, { "epoch": 0.46, "learning_rate": 2.9610468654899576e-05, "loss": 0.0828, "step": 2512 }, { "epoch": 0.46, "learning_rate": 2.9529316291336988e-05, "loss": 0.0722, "step": 2520 }, { "epoch": 0.47, "learning_rate": 2.9448163927774397e-05, "loss": 0.0693, "step": 2528 }, { "epoch": 0.47, "learning_rate": 2.936701156421181e-05, "loss": 0.0606, "step": 2536 }, { "epoch": 0.47, "learning_rate": 2.9285859200649218e-05, "loss": 0.0794, "step": 2544 }, { "epoch": 0.47, "learning_rate": 2.920470683708663e-05, "loss": 0.0794, "step": 2552 }, { "epoch": 0.47, "learning_rate": 2.9123554473524046e-05, "loss": 0.0353, "step": 2560 }, { "epoch": 0.47, "learning_rate": 2.9042402109961452e-05, "loss": 0.087, "step": 2568 }, { "epoch": 0.47, "learning_rate": 2.8961249746398867e-05, "loss": 0.0708, "step": 2576 }, { "epoch": 0.48, "learning_rate": 2.8880097382836273e-05, "loss": 0.102, "step": 2584 }, { "epoch": 0.48, "learning_rate": 2.879894501927369e-05, "loss": 0.0967, "step": 2592 }, { "epoch": 0.48, "learning_rate": 2.87177926557111e-05, "loss": 0.0528, "step": 2600 }, { "epoch": 0.48, "eval_accuracy": 0.9754981524329178, "eval_f1": 0.9751056999000018, "eval_loss": 0.06617435812950134, "eval_precision": 0.994524763813341, "eval_recall": 0.956430464259903, "eval_runtime": 317.2016, "eval_samples_per_second": 182.578, "eval_steps_per_second": 22.825, "step": 2600 }, { "epoch": 0.48, "learning_rate": 2.863664029214851e-05, "loss": 0.0414, "step": 2608 }, { "epoch": 0.48, "learning_rate": 2.8555487928585922e-05, "loss": 0.0709, "step": 2616 }, { "epoch": 0.48, "learning_rate": 2.847433556502333e-05, "loss": 0.0676, "step": 2624 }, { "epoch": 0.48, "learning_rate": 2.8393183201460743e-05, "loss": 0.0444, "step": 2632 }, { "epoch": 0.49, "learning_rate": 2.8312030837898156e-05, "loss": 0.074, "step": 2640 }, { "epoch": 0.49, "learning_rate": 2.8230878474335565e-05, "loss": 0.0723, "step": 2648 }, { "epoch": 0.49, "learning_rate": 2.8149726110772977e-05, "loss": 0.0471, "step": 2656 }, { "epoch": 0.49, "learning_rate": 2.8068573747210393e-05, "loss": 0.0475, "step": 2664 }, { "epoch": 0.49, "learning_rate": 2.7987421383647798e-05, "loss": 0.0507, "step": 2672 }, { "epoch": 0.49, "learning_rate": 2.7906269020085214e-05, "loss": 0.0465, "step": 2680 }, { "epoch": 0.5, "learning_rate": 2.782511665652262e-05, "loss": 0.0358, "step": 2688 }, { "epoch": 0.5, "learning_rate": 2.7743964292960035e-05, "loss": 0.0662, "step": 2696 }, { "epoch": 0.5, "eval_accuracy": 0.9828020858514349, "eval_f1": 0.9828204774389403, "eval_loss": 0.05437169969081879, "eval_precision": 0.9851654621529099, "eval_recall": 0.9804866297277764, "eval_runtime": 318.7613, "eval_samples_per_second": 181.685, "eval_steps_per_second": 22.713, "step": 2700 }, { "epoch": 0.5, "learning_rate": 2.7662811929397447e-05, "loss": 0.0568, "step": 2704 }, { "epoch": 0.5, "learning_rate": 2.7581659565834856e-05, "loss": 0.0558, "step": 2712 }, { "epoch": 0.5, "learning_rate": 2.750050720227227e-05, "loss": 0.0535, "step": 2720 }, { "epoch": 0.5, "learning_rate": 2.7419354838709678e-05, "loss": 0.0641, "step": 2728 }, { "epoch": 0.5, "learning_rate": 2.733820247514709e-05, "loss": 0.0529, "step": 2736 }, { "epoch": 0.51, "learning_rate": 2.7257050111584502e-05, "loss": 0.0754, "step": 2744 }, { "epoch": 0.51, "learning_rate": 2.717589774802191e-05, "loss": 0.0746, "step": 2752 }, { "epoch": 0.51, "learning_rate": 2.7094745384459323e-05, "loss": 0.0752, "step": 2760 }, { "epoch": 0.51, "learning_rate": 2.7013593020896732e-05, "loss": 0.0733, "step": 2768 }, { "epoch": 0.51, "learning_rate": 2.6932440657334145e-05, "loss": 0.0418, "step": 2776 }, { "epoch": 0.51, "learning_rate": 2.685128829377156e-05, "loss": 0.0575, "step": 2784 }, { "epoch": 0.51, "learning_rate": 2.6770135930208966e-05, "loss": 0.0463, "step": 2792 }, { "epoch": 0.52, "learning_rate": 2.668898356664638e-05, "loss": 0.0606, "step": 2800 }, { "epoch": 0.52, "eval_accuracy": 0.9828711537797423, "eval_f1": 0.9828776581054958, "eval_loss": 0.052921637892723083, "eval_precision": 0.9859067142214065, "eval_recall": 0.9798671576556424, "eval_runtime": 324.5523, "eval_samples_per_second": 178.443, "eval_steps_per_second": 22.308, "step": 2800 }, { "epoch": 0.52, "learning_rate": 2.6607831203083787e-05, "loss": 0.0665, "step": 2808 }, { "epoch": 0.52, "learning_rate": 2.6526678839521203e-05, "loss": 0.0533, "step": 2816 }, { "epoch": 0.52, "learning_rate": 2.6445526475958615e-05, "loss": 0.0706, "step": 2824 }, { "epoch": 0.52, "learning_rate": 2.6364374112396024e-05, "loss": 0.0729, "step": 2832 }, { "epoch": 0.52, "learning_rate": 2.6283221748833436e-05, "loss": 0.0498, "step": 2840 }, { "epoch": 0.52, "learning_rate": 2.620206938527085e-05, "loss": 0.0438, "step": 2848 }, { "epoch": 0.53, "learning_rate": 2.6120917021708257e-05, "loss": 0.0599, "step": 2856 }, { "epoch": 0.53, "learning_rate": 2.603976465814567e-05, "loss": 0.0413, "step": 2864 }, { "epoch": 0.53, "learning_rate": 2.595861229458308e-05, "loss": 0.0323, "step": 2872 }, { "epoch": 0.53, "learning_rate": 2.587745993102049e-05, "loss": 0.0706, "step": 2880 }, { "epoch": 0.53, "learning_rate": 2.5796307567457907e-05, "loss": 0.116, "step": 2888 }, { "epoch": 0.53, "learning_rate": 2.5715155203895312e-05, "loss": 0.0585, "step": 2896 }, { "epoch": 0.53, "eval_accuracy": 0.9815415961598232, "eval_f1": 0.9814451599465398, "eval_loss": 0.0516064278781414, "eval_precision": 0.99005462949993, "eval_recall": 0.9729841346319303, "eval_runtime": 325.834, "eval_samples_per_second": 177.741, "eval_steps_per_second": 22.22, "step": 2900 }, { "epoch": 0.53, "learning_rate": 2.5634002840332728e-05, "loss": 0.0475, "step": 2904 }, { "epoch": 0.54, "learning_rate": 2.5552850476770133e-05, "loss": 0.0612, "step": 2912 }, { "epoch": 0.54, "learning_rate": 2.547169811320755e-05, "loss": 0.0558, "step": 2920 }, { "epoch": 0.54, "learning_rate": 2.539054574964496e-05, "loss": 0.0572, "step": 2928 }, { "epoch": 0.54, "learning_rate": 2.530939338608237e-05, "loss": 0.0581, "step": 2936 }, { "epoch": 0.54, "learning_rate": 2.5228241022519783e-05, "loss": 0.0487, "step": 2944 }, { "epoch": 0.54, "learning_rate": 2.514708865895719e-05, "loss": 0.0681, "step": 2952 }, { "epoch": 0.55, "learning_rate": 2.5065936295394604e-05, "loss": 0.0658, "step": 2960 }, { "epoch": 0.55, "learning_rate": 2.4984783931832016e-05, "loss": 0.0769, "step": 2968 }, { "epoch": 0.55, "learning_rate": 2.490363156826943e-05, "loss": 0.0736, "step": 2976 }, { "epoch": 0.55, "learning_rate": 2.4822479204706837e-05, "loss": 0.0446, "step": 2984 }, { "epoch": 0.55, "learning_rate": 2.474132684114425e-05, "loss": 0.0472, "step": 2992 }, { "epoch": 0.55, "learning_rate": 2.466017447758166e-05, "loss": 0.058, "step": 3000 }, { "epoch": 0.55, "eval_accuracy": 0.9817142659805919, "eval_f1": 0.981694352733747, "eval_loss": 0.061614930629730225, "eval_precision": 0.9861776759047024, "eval_recall": 0.9772516089066318, "eval_runtime": 317.9761, "eval_samples_per_second": 182.133, "eval_steps_per_second": 22.769, "step": 3000 }, { "epoch": 0.55, "learning_rate": 2.457902211401907e-05, "loss": 0.0432, "step": 3008 }, { "epoch": 0.56, "learning_rate": 2.4497869750456483e-05, "loss": 0.0392, "step": 3016 }, { "epoch": 0.56, "learning_rate": 2.4416717386893896e-05, "loss": 0.0581, "step": 3024 }, { "epoch": 0.56, "learning_rate": 2.4335565023331305e-05, "loss": 0.0642, "step": 3032 }, { "epoch": 0.56, "learning_rate": 2.4254412659768717e-05, "loss": 0.039, "step": 3040 }, { "epoch": 0.56, "learning_rate": 2.417326029620613e-05, "loss": 0.0361, "step": 3048 }, { "epoch": 0.56, "learning_rate": 2.409210793264354e-05, "loss": 0.0591, "step": 3056 }, { "epoch": 0.56, "learning_rate": 2.401095556908095e-05, "loss": 0.0819, "step": 3064 }, { "epoch": 0.57, "learning_rate": 2.3929803205518363e-05, "loss": 0.1084, "step": 3072 }, { "epoch": 0.57, "learning_rate": 2.384865084195577e-05, "loss": 0.0425, "step": 3080 }, { "epoch": 0.57, "learning_rate": 2.3767498478393184e-05, "loss": 0.0711, "step": 3088 }, { "epoch": 0.57, "learning_rate": 2.3686346114830596e-05, "loss": 0.0449, "step": 3096 }, { "epoch": 0.57, "eval_accuracy": 0.9834064302241254, "eval_f1": 0.9835052608091176, "eval_loss": 0.04805440455675125, "eval_precision": 0.9810299958909738, "eval_recall": 0.985993048146746, "eval_runtime": 326.0918, "eval_samples_per_second": 177.6, "eval_steps_per_second": 22.202, "step": 3100 }, { "epoch": 0.57, "learning_rate": 2.3605193751268005e-05, "loss": 0.0558, "step": 3104 }, { "epoch": 0.57, "learning_rate": 2.3524041387705417e-05, "loss": 0.0629, "step": 3112 }, { "epoch": 0.57, "learning_rate": 2.344288902414283e-05, "loss": 0.0392, "step": 3120 }, { "epoch": 0.58, "learning_rate": 2.3361736660580242e-05, "loss": 0.0564, "step": 3128 }, { "epoch": 0.58, "learning_rate": 2.328058429701765e-05, "loss": 0.0418, "step": 3136 }, { "epoch": 0.58, "learning_rate": 2.3199431933455063e-05, "loss": 0.059, "step": 3144 }, { "epoch": 0.58, "learning_rate": 2.3118279569892472e-05, "loss": 0.0511, "step": 3152 }, { "epoch": 0.58, "learning_rate": 2.3037127206329888e-05, "loss": 0.0383, "step": 3160 }, { "epoch": 0.58, "learning_rate": 2.2955974842767297e-05, "loss": 0.0831, "step": 3168 }, { "epoch": 0.58, "learning_rate": 2.287482247920471e-05, "loss": 0.0407, "step": 3176 }, { "epoch": 0.59, "learning_rate": 2.2793670115642118e-05, "loss": 0.0354, "step": 3184 }, { "epoch": 0.59, "learning_rate": 2.271251775207953e-05, "loss": 0.0524, "step": 3192 }, { "epoch": 0.59, "learning_rate": 2.2631365388516943e-05, "loss": 0.0257, "step": 3200 }, { "epoch": 0.59, "eval_accuracy": 0.9817315329626688, "eval_f1": 0.9816478751084129, "eval_loss": 0.06091897934675217, "eval_precision": 0.9896128423040604, "eval_recall": 0.9738100973947758, "eval_runtime": 313.118, "eval_samples_per_second": 184.959, "eval_steps_per_second": 23.122, "step": 3200 }, { "epoch": 0.59, "learning_rate": 2.255021302495435e-05, "loss": 0.0473, "step": 3208 }, { "epoch": 0.59, "learning_rate": 2.2469060661391764e-05, "loss": 0.0779, "step": 3216 }, { "epoch": 0.59, "learning_rate": 2.2387908297829173e-05, "loss": 0.0436, "step": 3224 }, { "epoch": 0.6, "learning_rate": 2.230675593426659e-05, "loss": 0.0541, "step": 3232 }, { "epoch": 0.6, "learning_rate": 2.2225603570703997e-05, "loss": 0.0576, "step": 3240 }, { "epoch": 0.6, "learning_rate": 2.214445120714141e-05, "loss": 0.0649, "step": 3248 }, { "epoch": 0.6, "learning_rate": 2.206329884357882e-05, "loss": 0.0404, "step": 3256 }, { "epoch": 0.6, "learning_rate": 2.198214648001623e-05, "loss": 0.0351, "step": 3264 }, { "epoch": 0.6, "learning_rate": 2.1900994116453643e-05, "loss": 0.0364, "step": 3272 }, { "epoch": 0.6, "learning_rate": 2.1819841752891055e-05, "loss": 0.0483, "step": 3280 }, { "epoch": 0.61, "learning_rate": 2.1738689389328464e-05, "loss": 0.0562, "step": 3288 }, { "epoch": 0.61, "learning_rate": 2.1657537025765877e-05, "loss": 0.0627, "step": 3296 }, { "epoch": 0.61, "eval_accuracy": 0.9826121490485893, "eval_f1": 0.9827441438045136, "eval_loss": 0.04961591958999634, "eval_precision": 0.9786689419795221, "eval_recall": 0.98685342602471, "eval_runtime": 320.6595, "eval_samples_per_second": 180.609, "eval_steps_per_second": 22.578, "step": 3300 }, { "epoch": 0.61, "learning_rate": 2.157638466220329e-05, "loss": 0.0339, "step": 3304 }, { "epoch": 0.61, "learning_rate": 2.1495232298640698e-05, "loss": 0.0618, "step": 3312 }, { "epoch": 0.61, "learning_rate": 2.141407993507811e-05, "loss": 0.074, "step": 3320 }, { "epoch": 0.61, "learning_rate": 2.133292757151552e-05, "loss": 0.0781, "step": 3328 }, { "epoch": 0.61, "learning_rate": 2.125177520795293e-05, "loss": 0.059, "step": 3336 }, { "epoch": 0.62, "learning_rate": 2.1170622844390344e-05, "loss": 0.0411, "step": 3344 }, { "epoch": 0.62, "learning_rate": 2.1089470480827756e-05, "loss": 0.0604, "step": 3352 }, { "epoch": 0.62, "learning_rate": 2.1008318117265165e-05, "loss": 0.0602, "step": 3360 }, { "epoch": 0.62, "learning_rate": 2.0927165753702577e-05, "loss": 0.035, "step": 3368 }, { "epoch": 0.62, "learning_rate": 2.084601339013999e-05, "loss": 0.0545, "step": 3376 }, { "epoch": 0.62, "learning_rate": 2.0764861026577402e-05, "loss": 0.05, "step": 3384 }, { "epoch": 0.62, "learning_rate": 2.068370866301481e-05, "loss": 0.0676, "step": 3392 }, { "epoch": 0.63, "learning_rate": 2.0602556299452223e-05, "loss": 0.0501, "step": 3400 }, { "epoch": 0.63, "eval_accuracy": 0.9837863038298166, "eval_f1": 0.9838000103514312, "eval_loss": 0.04516952857375145, "eval_precision": 0.9863696118452916, "eval_recall": 0.9812437622603848, "eval_runtime": 311.4407, "eval_samples_per_second": 185.955, "eval_steps_per_second": 23.247, "step": 3400 } ], "max_steps": 5429, "num_train_epochs": 1, "total_flos": 5.7252965646336e+16, "trial_name": null, "trial_params": null }