|
{ |
|
"best_metric": 0.9870401337792643, |
|
"best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-2030", |
|
"epoch": 9.98769987699877, |
|
"eval_steps": 500, |
|
"global_step": 2030, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.303891658782959, |
|
"learning_rate": 0.004975369458128079, |
|
"loss": 1.8368, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.481412649154663, |
|
"learning_rate": 0.004950738916256157, |
|
"loss": 1.2897, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.0582990646362305, |
|
"learning_rate": 0.0049261083743842365, |
|
"loss": 1.0672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.254044532775879, |
|
"learning_rate": 0.004901477832512316, |
|
"loss": 0.9723, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.3351266384124756, |
|
"learning_rate": 0.004876847290640395, |
|
"loss": 0.917, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.9788981676101685, |
|
"learning_rate": 0.004852216748768473, |
|
"loss": 0.8483, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.460303544998169, |
|
"learning_rate": 0.004827586206896552, |
|
"loss": 0.7848, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.4298815727233887, |
|
"learning_rate": 0.004802955665024631, |
|
"loss": 0.8819, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.531933069229126, |
|
"learning_rate": 0.004778325123152709, |
|
"loss": 0.7758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.608288049697876, |
|
"learning_rate": 0.004753694581280788, |
|
"loss": 0.7678, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9291895627975464, |
|
"learning_rate": 0.004729064039408867, |
|
"loss": 0.7848, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8717544078826904, |
|
"learning_rate": 0.004704433497536946, |
|
"loss": 0.7742, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.476926803588867, |
|
"learning_rate": 0.004679802955665025, |
|
"loss": 0.8906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.5952930450439453, |
|
"learning_rate": 0.004655172413793103, |
|
"loss": 0.8464, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8514671325683594, |
|
"learning_rate": 0.004630541871921182, |
|
"loss": 0.9079, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.962122917175293, |
|
"learning_rate": 0.004605911330049261, |
|
"loss": 0.81, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.9573622941970825, |
|
"learning_rate": 0.00458128078817734, |
|
"loss": 0.8099, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8094934225082397, |
|
"learning_rate": 0.004559113300492611, |
|
"loss": 0.8105, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.7324118614196777, |
|
"learning_rate": 0.00453448275862069, |
|
"loss": 0.8123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.355945348739624, |
|
"learning_rate": 0.004509852216748769, |
|
"loss": 0.7947, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8975752508361204, |
|
"eval_f1": 0.863152897342088, |
|
"eval_loss": 0.3122749328613281, |
|
"eval_precision": 0.909035520710901, |
|
"eval_recall": 0.8450098410817735, |
|
"eval_runtime": 12.4051, |
|
"eval_samples_per_second": 192.824, |
|
"eval_steps_per_second": 12.092, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.4632035493850708, |
|
"learning_rate": 0.004485221674876847, |
|
"loss": 0.8797, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.434492349624634, |
|
"learning_rate": 0.004460591133004926, |
|
"loss": 0.6601, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.9131174087524414, |
|
"learning_rate": 0.004435960591133005, |
|
"loss": 0.7093, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.491714358329773, |
|
"learning_rate": 0.004411330049261084, |
|
"loss": 0.7247, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.9442235231399536, |
|
"learning_rate": 0.004386699507389163, |
|
"loss": 0.7182, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.9666441679000854, |
|
"learning_rate": 0.004362068965517241, |
|
"loss": 0.733, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.89641273021698, |
|
"learning_rate": 0.00433743842364532, |
|
"loss": 0.6678, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.3621200323104858, |
|
"learning_rate": 0.004312807881773399, |
|
"loss": 0.7066, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.8440511226654053, |
|
"learning_rate": 0.004288177339901478, |
|
"loss": 0.673, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.712856650352478, |
|
"learning_rate": 0.0042635467980295565, |
|
"loss": 0.7424, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.674052357673645, |
|
"learning_rate": 0.004238916256157636, |
|
"loss": 0.6104, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.5503976345062256, |
|
"learning_rate": 0.004214285714285715, |
|
"loss": 0.6868, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0976303815841675, |
|
"learning_rate": 0.004189655172413793, |
|
"loss": 0.703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.5332385301589966, |
|
"learning_rate": 0.004165024630541872, |
|
"loss": 0.6599, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.4168181419372559, |
|
"learning_rate": 0.004140394088669951, |
|
"loss": 0.7273, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.414102792739868, |
|
"learning_rate": 0.00411576354679803, |
|
"loss": 0.6551, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.9595593214035034, |
|
"learning_rate": 0.004091133004926108, |
|
"loss": 0.7608, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.0985585451126099, |
|
"learning_rate": 0.0040665024630541875, |
|
"loss": 0.6946, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 2.288224458694458, |
|
"learning_rate": 0.004041871921182267, |
|
"loss": 0.7381, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.385890245437622, |
|
"learning_rate": 0.004017241379310345, |
|
"loss": 0.6703, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9607023411371237, |
|
"eval_f1": 0.9535454483827537, |
|
"eval_loss": 0.14003877341747284, |
|
"eval_precision": 0.9589551276899428, |
|
"eval_recall": 0.9543228146341872, |
|
"eval_runtime": 12.5652, |
|
"eval_samples_per_second": 190.367, |
|
"eval_steps_per_second": 11.938, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 2.190495729446411, |
|
"learning_rate": 0.003992610837438423, |
|
"loss": 0.6558, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 2.319401979446411, |
|
"learning_rate": 0.003967980295566502, |
|
"loss": 0.618, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 1.5089792013168335, |
|
"learning_rate": 0.003943349753694581, |
|
"loss": 0.6622, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 1.2998738288879395, |
|
"learning_rate": 0.00391871921182266, |
|
"loss": 0.6039, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 1.5582971572875977, |
|
"learning_rate": 0.003894088669950739, |
|
"loss": 0.585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 2.1443846225738525, |
|
"learning_rate": 0.0038694581280788176, |
|
"loss": 0.6739, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.1868767738342285, |
|
"learning_rate": 0.0038448275862068967, |
|
"loss": 0.6598, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.5321897268295288, |
|
"learning_rate": 0.0038201970443349754, |
|
"loss": 0.6058, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 1.2971707582473755, |
|
"learning_rate": 0.0037955665024630545, |
|
"loss": 0.6025, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 1.0405155420303345, |
|
"learning_rate": 0.003770935960591133, |
|
"loss": 0.6544, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 2.27400541305542, |
|
"learning_rate": 0.0037463054187192118, |
|
"loss": 0.639, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.8367363214492798, |
|
"learning_rate": 0.003721674876847291, |
|
"loss": 0.6922, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.6862225532531738, |
|
"learning_rate": 0.0036970443349753695, |
|
"loss": 0.6698, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.1783074140548706, |
|
"learning_rate": 0.0036724137931034486, |
|
"loss": 0.648, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.327495813369751, |
|
"learning_rate": 0.0036477832512315273, |
|
"loss": 0.5485, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.2704271078109741, |
|
"learning_rate": 0.0036231527093596064, |
|
"loss": 0.6125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.412690281867981, |
|
"learning_rate": 0.003598522167487685, |
|
"loss": 0.5872, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.5771632194519043, |
|
"learning_rate": 0.0035738916256157637, |
|
"loss": 0.541, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.2916010618209839, |
|
"learning_rate": 0.0035492610837438428, |
|
"loss": 0.6637, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.0224180221557617, |
|
"learning_rate": 0.003524630541871921, |
|
"loss": 0.5941, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9698996655518395, |
|
"eval_f1": 0.9649463741495116, |
|
"eval_loss": 0.11816500872373581, |
|
"eval_precision": 0.9646998653209304, |
|
"eval_recall": 0.9681395759866063, |
|
"eval_runtime": 12.5344, |
|
"eval_samples_per_second": 190.834, |
|
"eval_steps_per_second": 11.967, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8427205085754395, |
|
"learning_rate": 0.0034999999999999996, |
|
"loss": 0.6179, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 1.1675821542739868, |
|
"learning_rate": 0.0034753694581280787, |
|
"loss": 0.6263, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 1.6908611059188843, |
|
"learning_rate": 0.0034507389162561574, |
|
"loss": 0.7175, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 1.5712032318115234, |
|
"learning_rate": 0.0034261083743842365, |
|
"loss": 0.6474, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 1.8690963983535767, |
|
"learning_rate": 0.003401477832512315, |
|
"loss": 0.5849, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 1.6917773485183716, |
|
"learning_rate": 0.0033768472906403942, |
|
"loss": 0.5954, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 1.4844752550125122, |
|
"learning_rate": 0.003352216748768473, |
|
"loss": 0.6284, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 1.444581389427185, |
|
"learning_rate": 0.003327586206896552, |
|
"loss": 0.5529, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 1.3921010494232178, |
|
"learning_rate": 0.0033029556650246306, |
|
"loss": 0.6022, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 1.3489701747894287, |
|
"learning_rate": 0.0032783251231527093, |
|
"loss": 0.6314, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 1.7876464128494263, |
|
"learning_rate": 0.0032536945812807884, |
|
"loss": 0.5322, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 1.2738828659057617, |
|
"learning_rate": 0.003229064039408867, |
|
"loss": 0.5869, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 1.5368149280548096, |
|
"learning_rate": 0.003204433497536946, |
|
"loss": 0.5659, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 1.890324354171753, |
|
"learning_rate": 0.0031798029556650248, |
|
"loss": 0.5506, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 1.553797721862793, |
|
"learning_rate": 0.003155172413793104, |
|
"loss": 0.6645, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 1.3873250484466553, |
|
"learning_rate": 0.0031305418719211825, |
|
"loss": 0.5013, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 1.6613869667053223, |
|
"learning_rate": 0.0031059113300492616, |
|
"loss": 0.4614, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 1.3628942966461182, |
|
"learning_rate": 0.00308128078817734, |
|
"loss": 0.5874, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"grad_norm": 1.5102113485336304, |
|
"learning_rate": 0.0030566502463054185, |
|
"loss": 0.5564, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 1.4211273193359375, |
|
"learning_rate": 0.0030320197044334976, |
|
"loss": 0.6018, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"grad_norm": 1.8017326593399048, |
|
"learning_rate": 0.0030073891625615762, |
|
"loss": 0.5837, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.967809364548495, |
|
"eval_f1": 0.9551154966770515, |
|
"eval_loss": 0.10157252848148346, |
|
"eval_precision": 0.9557533496633682, |
|
"eval_recall": 0.9586131276038764, |
|
"eval_runtime": 12.6536, |
|
"eval_samples_per_second": 189.038, |
|
"eval_steps_per_second": 11.854, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 1.3088445663452148, |
|
"learning_rate": 0.002982758620689655, |
|
"loss": 0.5057, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 1.200412631034851, |
|
"learning_rate": 0.002958128078817734, |
|
"loss": 0.5485, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 1.6468169689178467, |
|
"learning_rate": 0.0029334975369458127, |
|
"loss": 0.5171, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"grad_norm": 1.0748703479766846, |
|
"learning_rate": 0.0029088669950738917, |
|
"loss": 0.5664, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 1.396888256072998, |
|
"learning_rate": 0.0028842364532019704, |
|
"loss": 0.4641, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 1.0845372676849365, |
|
"learning_rate": 0.0028596059113300495, |
|
"loss": 0.5789, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 1.4134384393692017, |
|
"learning_rate": 0.002834975369458128, |
|
"loss": 0.4361, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.7656651735305786, |
|
"learning_rate": 0.002810344827586207, |
|
"loss": 0.5938, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 1.459712028503418, |
|
"learning_rate": 0.002785714285714286, |
|
"loss": 0.5146, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 1.2046053409576416, |
|
"learning_rate": 0.0027610837438423646, |
|
"loss": 0.4882, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 1.1301757097244263, |
|
"learning_rate": 0.0027364532019704436, |
|
"loss": 0.4728, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 1.255055546760559, |
|
"learning_rate": 0.0027118226600985223, |
|
"loss": 0.4384, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"grad_norm": 1.3792164325714111, |
|
"learning_rate": 0.0026871921182266014, |
|
"loss": 0.5357, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 1.3066402673721313, |
|
"learning_rate": 0.00266256157635468, |
|
"loss": 0.5361, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 1.2377945184707642, |
|
"learning_rate": 0.002637931034482759, |
|
"loss": 0.5334, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 1.3673447370529175, |
|
"learning_rate": 0.0026133004926108374, |
|
"loss": 0.4896, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 1.8711413145065308, |
|
"learning_rate": 0.002588669950738916, |
|
"loss": 0.4729, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 1.1367807388305664, |
|
"learning_rate": 0.002564039408866995, |
|
"loss": 0.483, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 1.5432896614074707, |
|
"learning_rate": 0.0025394088669950738, |
|
"loss": 0.5477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 1.0067399740219116, |
|
"learning_rate": 0.0025147783251231524, |
|
"loss": 0.5193, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9790969899665551, |
|
"eval_f1": 0.9675306891159455, |
|
"eval_loss": 0.08001040667295456, |
|
"eval_precision": 0.9700832487729493, |
|
"eval_recall": 0.9684116828701858, |
|
"eval_runtime": 12.5434, |
|
"eval_samples_per_second": 190.698, |
|
"eval_steps_per_second": 11.958, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 1.8359023332595825, |
|
"learning_rate": 0.0024901477832512315, |
|
"loss": 0.5087, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 1.3961881399154663, |
|
"learning_rate": 0.00246551724137931, |
|
"loss": 0.4565, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"grad_norm": 1.9095091819763184, |
|
"learning_rate": 0.0024408866995073893, |
|
"loss": 0.4432, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"grad_norm": 1.2952779531478882, |
|
"learning_rate": 0.002416256157635468, |
|
"loss": 0.4392, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"grad_norm": 1.309617042541504, |
|
"learning_rate": 0.002391625615763547, |
|
"loss": 0.5255, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 1.497014045715332, |
|
"learning_rate": 0.0023669950738916257, |
|
"loss": 0.5122, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"grad_norm": 1.3211737871170044, |
|
"learning_rate": 0.0023423645320197048, |
|
"loss": 0.5529, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"grad_norm": 0.9946479797363281, |
|
"learning_rate": 0.0023177339901477834, |
|
"loss": 0.4708, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"grad_norm": 0.8456437587738037, |
|
"learning_rate": 0.002293103448275862, |
|
"loss": 0.4935, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"grad_norm": 1.495175838470459, |
|
"learning_rate": 0.0022684729064039407, |
|
"loss": 0.4975, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"grad_norm": 1.6447827816009521, |
|
"learning_rate": 0.00224384236453202, |
|
"loss": 0.4138, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"grad_norm": 0.8438058495521545, |
|
"learning_rate": 0.0022192118226600985, |
|
"loss": 0.4502, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"grad_norm": 1.3904708623886108, |
|
"learning_rate": 0.0021945812807881776, |
|
"loss": 0.4681, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 1.498844861984253, |
|
"learning_rate": 0.0021699507389162562, |
|
"loss": 0.4637, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"grad_norm": 1.1716539859771729, |
|
"learning_rate": 0.002145320197044335, |
|
"loss": 0.5183, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 0.7125697135925293, |
|
"learning_rate": 0.002120689655172414, |
|
"loss": 0.4307, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"grad_norm": 1.341647744178772, |
|
"learning_rate": 0.0020960591133004926, |
|
"loss": 0.4988, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"grad_norm": 1.4662394523620605, |
|
"learning_rate": 0.0020714285714285717, |
|
"loss": 0.4398, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"grad_norm": 1.7114837169647217, |
|
"learning_rate": 0.0020467980295566504, |
|
"loss": 0.4488, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"grad_norm": 1.0667368173599243, |
|
"learning_rate": 0.002022167487684729, |
|
"loss": 0.5513, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9862040133779264, |
|
"eval_f1": 0.9840490701292556, |
|
"eval_loss": 0.0578995905816555, |
|
"eval_precision": 0.9830589209967975, |
|
"eval_recall": 0.985517150491058, |
|
"eval_runtime": 12.5187, |
|
"eval_samples_per_second": 191.074, |
|
"eval_steps_per_second": 11.982, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1651352643966675, |
|
"learning_rate": 0.0019975369458128077, |
|
"loss": 0.4321, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 1.0694313049316406, |
|
"learning_rate": 0.0019729064039408868, |
|
"loss": 0.4343, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 1.5686174631118774, |
|
"learning_rate": 0.0019482758620689657, |
|
"loss": 0.367, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 0.7148666977882385, |
|
"learning_rate": 0.0019236453201970443, |
|
"loss": 0.4364, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 1.4920200109481812, |
|
"learning_rate": 0.0018990147783251232, |
|
"loss": 0.4814, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.0870678424835205, |
|
"learning_rate": 0.001874384236453202, |
|
"loss": 0.4145, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 1.0466927289962769, |
|
"learning_rate": 0.001849753694581281, |
|
"loss": 0.4296, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"grad_norm": 0.9908223748207092, |
|
"learning_rate": 0.0018251231527093596, |
|
"loss": 0.4183, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.6582946181297302, |
|
"learning_rate": 0.0018004926108374385, |
|
"loss": 0.4099, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 1.3454304933547974, |
|
"learning_rate": 0.0017758620689655171, |
|
"loss": 0.367, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"grad_norm": 1.3359636068344116, |
|
"learning_rate": 0.001751231527093596, |
|
"loss": 0.4025, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"grad_norm": 1.2285734415054321, |
|
"learning_rate": 0.0017266009852216749, |
|
"loss": 0.4675, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.9923570156097412, |
|
"learning_rate": 0.0017019704433497537, |
|
"loss": 0.3958, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 0.8746837973594666, |
|
"learning_rate": 0.0016773399014778326, |
|
"loss": 0.4365, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"grad_norm": 0.8892514705657959, |
|
"learning_rate": 0.0016527093596059115, |
|
"loss": 0.4296, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"grad_norm": 1.2088005542755127, |
|
"learning_rate": 0.0016280788177339904, |
|
"loss": 0.3881, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"grad_norm": 1.0085664987564087, |
|
"learning_rate": 0.0016034482758620688, |
|
"loss": 0.4745, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"grad_norm": 1.0228571891784668, |
|
"learning_rate": 0.0015788177339901477, |
|
"loss": 0.3763, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"grad_norm": 0.8100888133049011, |
|
"learning_rate": 0.0015541871921182266, |
|
"loss": 0.4198, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 1.2440354824066162, |
|
"learning_rate": 0.0015295566502463054, |
|
"loss": 0.4113, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"grad_norm": 0.6661180257797241, |
|
"learning_rate": 0.0015049261083743843, |
|
"loss": 0.4343, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9832775919732442, |
|
"eval_f1": 0.9834794316662396, |
|
"eval_loss": 0.07752905040979385, |
|
"eval_precision": 0.985796063365073, |
|
"eval_recall": 0.981752686688599, |
|
"eval_runtime": 12.5648, |
|
"eval_samples_per_second": 190.373, |
|
"eval_steps_per_second": 11.938, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.591243326663971, |
|
"learning_rate": 0.0014802955665024632, |
|
"loss": 0.3291, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.8764331936836243, |
|
"learning_rate": 0.001455665024630542, |
|
"loss": 0.3704, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"grad_norm": 1.115868330001831, |
|
"learning_rate": 0.0014310344827586207, |
|
"loss": 0.3625, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"grad_norm": 1.1736584901809692, |
|
"learning_rate": 0.0014064039408866996, |
|
"loss": 0.3571, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"grad_norm": 0.9778345227241516, |
|
"learning_rate": 0.0013817733990147782, |
|
"loss": 0.3594, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 1.1396944522857666, |
|
"learning_rate": 0.0013571428571428571, |
|
"loss": 0.3615, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"grad_norm": 1.2598211765289307, |
|
"learning_rate": 0.001332512315270936, |
|
"loss": 0.3802, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"grad_norm": 1.1756126880645752, |
|
"learning_rate": 0.0013078817733990149, |
|
"loss": 0.4429, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 0.9109674096107483, |
|
"learning_rate": 0.0012832512315270935, |
|
"loss": 0.3578, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"grad_norm": 0.7428516745567322, |
|
"learning_rate": 0.0012586206896551724, |
|
"loss": 0.3705, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 1.3957030773162842, |
|
"learning_rate": 0.0012339901477832513, |
|
"loss": 0.3769, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"grad_norm": 1.0507686138153076, |
|
"learning_rate": 0.00120935960591133, |
|
"loss": 0.3525, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"grad_norm": 0.8914185762405396, |
|
"learning_rate": 0.0011847290640394088, |
|
"loss": 0.4804, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"grad_norm": 0.8193994760513306, |
|
"learning_rate": 0.0011600985221674877, |
|
"loss": 0.298, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 0.9263984560966492, |
|
"learning_rate": 0.0011354679802955665, |
|
"loss": 0.3142, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"grad_norm": 1.8249924182891846, |
|
"learning_rate": 0.0011108374384236454, |
|
"loss": 0.4135, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"grad_norm": 0.659723162651062, |
|
"learning_rate": 0.001086206896551724, |
|
"loss": 0.3844, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"grad_norm": 0.7200958132743835, |
|
"learning_rate": 0.001061576354679803, |
|
"loss": 0.3627, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 1.055242657661438, |
|
"learning_rate": 0.0010369458128078818, |
|
"loss": 0.3522, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"grad_norm": 1.0147466659545898, |
|
"learning_rate": 0.0010123152709359607, |
|
"loss": 0.3942, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9832775919732442, |
|
"eval_f1": 0.9816549969154011, |
|
"eval_loss": 0.07823298126459122, |
|
"eval_precision": 0.9812899149698605, |
|
"eval_recall": 0.9827399465830431, |
|
"eval_runtime": 12.6409, |
|
"eval_samples_per_second": 189.227, |
|
"eval_steps_per_second": 11.866, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 1.2122970819473267, |
|
"learning_rate": 0.0009876847290640393, |
|
"loss": 0.2658, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.6683902740478516, |
|
"learning_rate": 0.0009630541871921182, |
|
"loss": 0.3499, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 1.0198993682861328, |
|
"learning_rate": 0.0009384236453201971, |
|
"loss": 0.4031, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"grad_norm": 1.2388522624969482, |
|
"learning_rate": 0.0009137931034482759, |
|
"loss": 0.3164, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.2226431369781494, |
|
"learning_rate": 0.0008891625615763547, |
|
"loss": 0.299, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"grad_norm": 1.0016721487045288, |
|
"learning_rate": 0.0008645320197044335, |
|
"loss": 0.3315, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 1.0766950845718384, |
|
"learning_rate": 0.0008399014778325123, |
|
"loss": 0.286, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 1.8925853967666626, |
|
"learning_rate": 0.0008152709359605911, |
|
"loss": 0.3745, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"grad_norm": 0.8409897685050964, |
|
"learning_rate": 0.00079064039408867, |
|
"loss": 0.3276, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"grad_norm": 1.1315199136734009, |
|
"learning_rate": 0.0007660098522167489, |
|
"loss": 0.3275, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"grad_norm": 1.05160391330719, |
|
"learning_rate": 0.0007413793103448275, |
|
"loss": 0.3072, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 1.0058565139770508, |
|
"learning_rate": 0.0007167487684729064, |
|
"loss": 0.3413, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"grad_norm": 0.9650315046310425, |
|
"learning_rate": 0.0006921182266009853, |
|
"loss": 0.365, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"grad_norm": 0.6396649479866028, |
|
"learning_rate": 0.0006674876847290641, |
|
"loss": 0.3271, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"grad_norm": 0.7196962833404541, |
|
"learning_rate": 0.0006428571428571428, |
|
"loss": 0.3683, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"grad_norm": 0.8004360198974609, |
|
"learning_rate": 0.0006182266009852217, |
|
"loss": 0.3687, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"grad_norm": 0.9620378613471985, |
|
"learning_rate": 0.0005935960591133005, |
|
"loss": 0.3298, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.7279284596443176, |
|
"learning_rate": 0.0005689655172413793, |
|
"loss": 0.3109, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"grad_norm": 1.2889859676361084, |
|
"learning_rate": 0.0005443349753694581, |
|
"loss": 0.4205, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 0.8951707482337952, |
|
"learning_rate": 0.000519704433497537, |
|
"loss": 0.2971, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9862040133779264, |
|
"eval_f1": 0.9872927641297526, |
|
"eval_loss": 0.08386523276567459, |
|
"eval_precision": 0.9884096313888006, |
|
"eval_recall": 0.9865651085702777, |
|
"eval_runtime": 12.5898, |
|
"eval_samples_per_second": 189.995, |
|
"eval_steps_per_second": 11.914, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.6368119120597839, |
|
"learning_rate": 0.0004950738916256157, |
|
"loss": 0.3692, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.5298008322715759, |
|
"learning_rate": 0.0004704433497536946, |
|
"loss": 0.3112, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.5324183702468872, |
|
"learning_rate": 0.0004458128078817734, |
|
"loss": 0.3389, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"grad_norm": 0.6283653974533081, |
|
"learning_rate": 0.0004211822660098522, |
|
"loss": 0.3328, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.9130664467811584, |
|
"learning_rate": 0.0003965517241379311, |
|
"loss": 0.2774, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.569354772567749, |
|
"learning_rate": 0.00037192118226600984, |
|
"loss": 0.3055, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"grad_norm": 0.91834557056427, |
|
"learning_rate": 0.0003472906403940887, |
|
"loss": 0.3108, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"grad_norm": 1.2413830757141113, |
|
"learning_rate": 0.0003226600985221675, |
|
"loss": 0.2896, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"grad_norm": 0.7163951396942139, |
|
"learning_rate": 0.0002980295566502463, |
|
"loss": 0.2883, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": 1.121543288230896, |
|
"learning_rate": 0.0002733990147783251, |
|
"loss": 0.3061, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.8933872580528259, |
|
"learning_rate": 0.00024876847290640394, |
|
"loss": 0.3087, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"grad_norm": 0.7040281295776367, |
|
"learning_rate": 0.00022413793103448276, |
|
"loss": 0.2723, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 0.3641883432865143, |
|
"learning_rate": 0.00019950738916256158, |
|
"loss": 0.2397, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"grad_norm": 1.092432975769043, |
|
"learning_rate": 0.0001748768472906404, |
|
"loss": 0.2763, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"grad_norm": 0.6717754602432251, |
|
"learning_rate": 0.00015024630541871922, |
|
"loss": 0.2921, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"grad_norm": 0.8007289171218872, |
|
"learning_rate": 0.00012561576354679804, |
|
"loss": 0.3365, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"grad_norm": 0.9652357697486877, |
|
"learning_rate": 0.00010098522167487686, |
|
"loss": 0.2998, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.6942909955978394, |
|
"learning_rate": 7.635467980295568e-05, |
|
"loss": 0.2917, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"grad_norm": 0.5785544514656067, |
|
"learning_rate": 5.172413793103448e-05, |
|
"loss": 0.2662, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"grad_norm": 0.7549653649330139, |
|
"learning_rate": 2.70935960591133e-05, |
|
"loss": 0.2989, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"grad_norm": 1.3010107278823853, |
|
"learning_rate": 2.463054187192118e-06, |
|
"loss": 0.3242, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9870401337792643, |
|
"eval_f1": 0.9868360981525407, |
|
"eval_loss": 0.07449871301651001, |
|
"eval_precision": 0.9876698762890729, |
|
"eval_recall": 0.9863397416476135, |
|
"eval_runtime": 12.5861, |
|
"eval_samples_per_second": 190.05, |
|
"eval_steps_per_second": 11.918, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"step": 2030, |
|
"total_flos": 1.0133154899356189e+19, |
|
"train_loss": 0.5248493043072705, |
|
"train_runtime": 1518.4893, |
|
"train_samples_per_second": 85.611, |
|
"train_steps_per_second": 1.337 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2030, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.0133154899356189e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|