organc-deit-base-finetuned / trainer_state.json
selmamalak's picture
End of training
06d3fe6 verified
{
"best_metric": 0.9870401337792643,
"best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-2030",
"epoch": 9.98769987699877,
"eval_steps": 500,
"global_step": 2030,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 3.303891658782959,
"learning_rate": 0.004975369458128079,
"loss": 1.8368,
"step": 10
},
{
"epoch": 0.1,
"grad_norm": 2.481412649154663,
"learning_rate": 0.004950738916256157,
"loss": 1.2897,
"step": 20
},
{
"epoch": 0.15,
"grad_norm": 2.0582990646362305,
"learning_rate": 0.0049261083743842365,
"loss": 1.0672,
"step": 30
},
{
"epoch": 0.2,
"grad_norm": 2.254044532775879,
"learning_rate": 0.004901477832512316,
"loss": 0.9723,
"step": 40
},
{
"epoch": 0.25,
"grad_norm": 2.3351266384124756,
"learning_rate": 0.004876847290640395,
"loss": 0.917,
"step": 50
},
{
"epoch": 0.3,
"grad_norm": 1.9788981676101685,
"learning_rate": 0.004852216748768473,
"loss": 0.8483,
"step": 60
},
{
"epoch": 0.34,
"grad_norm": 1.460303544998169,
"learning_rate": 0.004827586206896552,
"loss": 0.7848,
"step": 70
},
{
"epoch": 0.39,
"grad_norm": 3.4298815727233887,
"learning_rate": 0.004802955665024631,
"loss": 0.8819,
"step": 80
},
{
"epoch": 0.44,
"grad_norm": 1.531933069229126,
"learning_rate": 0.004778325123152709,
"loss": 0.7758,
"step": 90
},
{
"epoch": 0.49,
"grad_norm": 1.608288049697876,
"learning_rate": 0.004753694581280788,
"loss": 0.7678,
"step": 100
},
{
"epoch": 0.54,
"grad_norm": 1.9291895627975464,
"learning_rate": 0.004729064039408867,
"loss": 0.7848,
"step": 110
},
{
"epoch": 0.59,
"grad_norm": 1.8717544078826904,
"learning_rate": 0.004704433497536946,
"loss": 0.7742,
"step": 120
},
{
"epoch": 0.64,
"grad_norm": 4.476926803588867,
"learning_rate": 0.004679802955665025,
"loss": 0.8906,
"step": 130
},
{
"epoch": 0.69,
"grad_norm": 2.5952930450439453,
"learning_rate": 0.004655172413793103,
"loss": 0.8464,
"step": 140
},
{
"epoch": 0.74,
"grad_norm": 1.8514671325683594,
"learning_rate": 0.004630541871921182,
"loss": 0.9079,
"step": 150
},
{
"epoch": 0.79,
"grad_norm": 1.962122917175293,
"learning_rate": 0.004605911330049261,
"loss": 0.81,
"step": 160
},
{
"epoch": 0.84,
"grad_norm": 1.9573622941970825,
"learning_rate": 0.00458128078817734,
"loss": 0.8099,
"step": 170
},
{
"epoch": 0.89,
"grad_norm": 1.8094934225082397,
"learning_rate": 0.004559113300492611,
"loss": 0.8105,
"step": 180
},
{
"epoch": 0.93,
"grad_norm": 2.7324118614196777,
"learning_rate": 0.00453448275862069,
"loss": 0.8123,
"step": 190
},
{
"epoch": 0.98,
"grad_norm": 2.355945348739624,
"learning_rate": 0.004509852216748769,
"loss": 0.7947,
"step": 200
},
{
"epoch": 1.0,
"eval_accuracy": 0.8975752508361204,
"eval_f1": 0.863152897342088,
"eval_loss": 0.3122749328613281,
"eval_precision": 0.909035520710901,
"eval_recall": 0.8450098410817735,
"eval_runtime": 12.4051,
"eval_samples_per_second": 192.824,
"eval_steps_per_second": 12.092,
"step": 203
},
{
"epoch": 1.03,
"grad_norm": 1.4632035493850708,
"learning_rate": 0.004485221674876847,
"loss": 0.8797,
"step": 210
},
{
"epoch": 1.08,
"grad_norm": 2.434492349624634,
"learning_rate": 0.004460591133004926,
"loss": 0.6601,
"step": 220
},
{
"epoch": 1.13,
"grad_norm": 1.9131174087524414,
"learning_rate": 0.004435960591133005,
"loss": 0.7093,
"step": 230
},
{
"epoch": 1.18,
"grad_norm": 1.491714358329773,
"learning_rate": 0.004411330049261084,
"loss": 0.7247,
"step": 240
},
{
"epoch": 1.23,
"grad_norm": 1.9442235231399536,
"learning_rate": 0.004386699507389163,
"loss": 0.7182,
"step": 250
},
{
"epoch": 1.28,
"grad_norm": 1.9666441679000854,
"learning_rate": 0.004362068965517241,
"loss": 0.733,
"step": 260
},
{
"epoch": 1.33,
"grad_norm": 1.89641273021698,
"learning_rate": 0.00433743842364532,
"loss": 0.6678,
"step": 270
},
{
"epoch": 1.38,
"grad_norm": 1.3621200323104858,
"learning_rate": 0.004312807881773399,
"loss": 0.7066,
"step": 280
},
{
"epoch": 1.43,
"grad_norm": 1.8440511226654053,
"learning_rate": 0.004288177339901478,
"loss": 0.673,
"step": 290
},
{
"epoch": 1.48,
"grad_norm": 1.712856650352478,
"learning_rate": 0.0042635467980295565,
"loss": 0.7424,
"step": 300
},
{
"epoch": 1.53,
"grad_norm": 1.674052357673645,
"learning_rate": 0.004238916256157636,
"loss": 0.6104,
"step": 310
},
{
"epoch": 1.57,
"grad_norm": 1.5503976345062256,
"learning_rate": 0.004214285714285715,
"loss": 0.6868,
"step": 320
},
{
"epoch": 1.62,
"grad_norm": 1.0976303815841675,
"learning_rate": 0.004189655172413793,
"loss": 0.703,
"step": 330
},
{
"epoch": 1.67,
"grad_norm": 1.5332385301589966,
"learning_rate": 0.004165024630541872,
"loss": 0.6599,
"step": 340
},
{
"epoch": 1.72,
"grad_norm": 1.4168181419372559,
"learning_rate": 0.004140394088669951,
"loss": 0.7273,
"step": 350
},
{
"epoch": 1.77,
"grad_norm": 2.414102792739868,
"learning_rate": 0.00411576354679803,
"loss": 0.6551,
"step": 360
},
{
"epoch": 1.82,
"grad_norm": 1.9595593214035034,
"learning_rate": 0.004091133004926108,
"loss": 0.7608,
"step": 370
},
{
"epoch": 1.87,
"grad_norm": 1.0985585451126099,
"learning_rate": 0.0040665024630541875,
"loss": 0.6946,
"step": 380
},
{
"epoch": 1.92,
"grad_norm": 2.288224458694458,
"learning_rate": 0.004041871921182267,
"loss": 0.7381,
"step": 390
},
{
"epoch": 1.97,
"grad_norm": 1.385890245437622,
"learning_rate": 0.004017241379310345,
"loss": 0.6703,
"step": 400
},
{
"epoch": 2.0,
"eval_accuracy": 0.9607023411371237,
"eval_f1": 0.9535454483827537,
"eval_loss": 0.14003877341747284,
"eval_precision": 0.9589551276899428,
"eval_recall": 0.9543228146341872,
"eval_runtime": 12.5652,
"eval_samples_per_second": 190.367,
"eval_steps_per_second": 11.938,
"step": 406
},
{
"epoch": 2.02,
"grad_norm": 2.190495729446411,
"learning_rate": 0.003992610837438423,
"loss": 0.6558,
"step": 410
},
{
"epoch": 2.07,
"grad_norm": 2.319401979446411,
"learning_rate": 0.003967980295566502,
"loss": 0.618,
"step": 420
},
{
"epoch": 2.12,
"grad_norm": 1.5089792013168335,
"learning_rate": 0.003943349753694581,
"loss": 0.6622,
"step": 430
},
{
"epoch": 2.16,
"grad_norm": 1.2998738288879395,
"learning_rate": 0.00391871921182266,
"loss": 0.6039,
"step": 440
},
{
"epoch": 2.21,
"grad_norm": 1.5582971572875977,
"learning_rate": 0.003894088669950739,
"loss": 0.585,
"step": 450
},
{
"epoch": 2.26,
"grad_norm": 2.1443846225738525,
"learning_rate": 0.0038694581280788176,
"loss": 0.6739,
"step": 460
},
{
"epoch": 2.31,
"grad_norm": 1.1868767738342285,
"learning_rate": 0.0038448275862068967,
"loss": 0.6598,
"step": 470
},
{
"epoch": 2.36,
"grad_norm": 1.5321897268295288,
"learning_rate": 0.0038201970443349754,
"loss": 0.6058,
"step": 480
},
{
"epoch": 2.41,
"grad_norm": 1.2971707582473755,
"learning_rate": 0.0037955665024630545,
"loss": 0.6025,
"step": 490
},
{
"epoch": 2.46,
"grad_norm": 1.0405155420303345,
"learning_rate": 0.003770935960591133,
"loss": 0.6544,
"step": 500
},
{
"epoch": 2.51,
"grad_norm": 2.27400541305542,
"learning_rate": 0.0037463054187192118,
"loss": 0.639,
"step": 510
},
{
"epoch": 2.56,
"grad_norm": 1.8367363214492798,
"learning_rate": 0.003721674876847291,
"loss": 0.6922,
"step": 520
},
{
"epoch": 2.61,
"grad_norm": 1.6862225532531738,
"learning_rate": 0.0036970443349753695,
"loss": 0.6698,
"step": 530
},
{
"epoch": 2.66,
"grad_norm": 1.1783074140548706,
"learning_rate": 0.0036724137931034486,
"loss": 0.648,
"step": 540
},
{
"epoch": 2.71,
"grad_norm": 1.327495813369751,
"learning_rate": 0.0036477832512315273,
"loss": 0.5485,
"step": 550
},
{
"epoch": 2.76,
"grad_norm": 1.2704271078109741,
"learning_rate": 0.0036231527093596064,
"loss": 0.6125,
"step": 560
},
{
"epoch": 2.8,
"grad_norm": 1.412690281867981,
"learning_rate": 0.003598522167487685,
"loss": 0.5872,
"step": 570
},
{
"epoch": 2.85,
"grad_norm": 1.5771632194519043,
"learning_rate": 0.0035738916256157637,
"loss": 0.541,
"step": 580
},
{
"epoch": 2.9,
"grad_norm": 1.2916010618209839,
"learning_rate": 0.0035492610837438428,
"loss": 0.6637,
"step": 590
},
{
"epoch": 2.95,
"grad_norm": 1.0224180221557617,
"learning_rate": 0.003524630541871921,
"loss": 0.5941,
"step": 600
},
{
"epoch": 3.0,
"eval_accuracy": 0.9698996655518395,
"eval_f1": 0.9649463741495116,
"eval_loss": 0.11816500872373581,
"eval_precision": 0.9646998653209304,
"eval_recall": 0.9681395759866063,
"eval_runtime": 12.5344,
"eval_samples_per_second": 190.834,
"eval_steps_per_second": 11.967,
"step": 609
},
{
"epoch": 3.0,
"grad_norm": 1.8427205085754395,
"learning_rate": 0.0034999999999999996,
"loss": 0.6179,
"step": 610
},
{
"epoch": 3.05,
"grad_norm": 1.1675821542739868,
"learning_rate": 0.0034753694581280787,
"loss": 0.6263,
"step": 620
},
{
"epoch": 3.1,
"grad_norm": 1.6908611059188843,
"learning_rate": 0.0034507389162561574,
"loss": 0.7175,
"step": 630
},
{
"epoch": 3.15,
"grad_norm": 1.5712032318115234,
"learning_rate": 0.0034261083743842365,
"loss": 0.6474,
"step": 640
},
{
"epoch": 3.2,
"grad_norm": 1.8690963983535767,
"learning_rate": 0.003401477832512315,
"loss": 0.5849,
"step": 650
},
{
"epoch": 3.25,
"grad_norm": 1.6917773485183716,
"learning_rate": 0.0033768472906403942,
"loss": 0.5954,
"step": 660
},
{
"epoch": 3.3,
"grad_norm": 1.4844752550125122,
"learning_rate": 0.003352216748768473,
"loss": 0.6284,
"step": 670
},
{
"epoch": 3.35,
"grad_norm": 1.444581389427185,
"learning_rate": 0.003327586206896552,
"loss": 0.5529,
"step": 680
},
{
"epoch": 3.39,
"grad_norm": 1.3921010494232178,
"learning_rate": 0.0033029556650246306,
"loss": 0.6022,
"step": 690
},
{
"epoch": 3.44,
"grad_norm": 1.3489701747894287,
"learning_rate": 0.0032783251231527093,
"loss": 0.6314,
"step": 700
},
{
"epoch": 3.49,
"grad_norm": 1.7876464128494263,
"learning_rate": 0.0032536945812807884,
"loss": 0.5322,
"step": 710
},
{
"epoch": 3.54,
"grad_norm": 1.2738828659057617,
"learning_rate": 0.003229064039408867,
"loss": 0.5869,
"step": 720
},
{
"epoch": 3.59,
"grad_norm": 1.5368149280548096,
"learning_rate": 0.003204433497536946,
"loss": 0.5659,
"step": 730
},
{
"epoch": 3.64,
"grad_norm": 1.890324354171753,
"learning_rate": 0.0031798029556650248,
"loss": 0.5506,
"step": 740
},
{
"epoch": 3.69,
"grad_norm": 1.553797721862793,
"learning_rate": 0.003155172413793104,
"loss": 0.6645,
"step": 750
},
{
"epoch": 3.74,
"grad_norm": 1.3873250484466553,
"learning_rate": 0.0031305418719211825,
"loss": 0.5013,
"step": 760
},
{
"epoch": 3.79,
"grad_norm": 1.6613869667053223,
"learning_rate": 0.0031059113300492616,
"loss": 0.4614,
"step": 770
},
{
"epoch": 3.84,
"grad_norm": 1.3628942966461182,
"learning_rate": 0.00308128078817734,
"loss": 0.5874,
"step": 780
},
{
"epoch": 3.89,
"grad_norm": 1.5102113485336304,
"learning_rate": 0.0030566502463054185,
"loss": 0.5564,
"step": 790
},
{
"epoch": 3.94,
"grad_norm": 1.4211273193359375,
"learning_rate": 0.0030320197044334976,
"loss": 0.6018,
"step": 800
},
{
"epoch": 3.99,
"grad_norm": 1.8017326593399048,
"learning_rate": 0.0030073891625615762,
"loss": 0.5837,
"step": 810
},
{
"epoch": 4.0,
"eval_accuracy": 0.967809364548495,
"eval_f1": 0.9551154966770515,
"eval_loss": 0.10157252848148346,
"eval_precision": 0.9557533496633682,
"eval_recall": 0.9586131276038764,
"eval_runtime": 12.6536,
"eval_samples_per_second": 189.038,
"eval_steps_per_second": 11.854,
"step": 813
},
{
"epoch": 4.03,
"grad_norm": 1.3088445663452148,
"learning_rate": 0.002982758620689655,
"loss": 0.5057,
"step": 820
},
{
"epoch": 4.08,
"grad_norm": 1.200412631034851,
"learning_rate": 0.002958128078817734,
"loss": 0.5485,
"step": 830
},
{
"epoch": 4.13,
"grad_norm": 1.6468169689178467,
"learning_rate": 0.0029334975369458127,
"loss": 0.5171,
"step": 840
},
{
"epoch": 4.18,
"grad_norm": 1.0748703479766846,
"learning_rate": 0.0029088669950738917,
"loss": 0.5664,
"step": 850
},
{
"epoch": 4.23,
"grad_norm": 1.396888256072998,
"learning_rate": 0.0028842364532019704,
"loss": 0.4641,
"step": 860
},
{
"epoch": 4.28,
"grad_norm": 1.0845372676849365,
"learning_rate": 0.0028596059113300495,
"loss": 0.5789,
"step": 870
},
{
"epoch": 4.33,
"grad_norm": 1.4134384393692017,
"learning_rate": 0.002834975369458128,
"loss": 0.4361,
"step": 880
},
{
"epoch": 4.38,
"grad_norm": 0.7656651735305786,
"learning_rate": 0.002810344827586207,
"loss": 0.5938,
"step": 890
},
{
"epoch": 4.43,
"grad_norm": 1.459712028503418,
"learning_rate": 0.002785714285714286,
"loss": 0.5146,
"step": 900
},
{
"epoch": 4.48,
"grad_norm": 1.2046053409576416,
"learning_rate": 0.0027610837438423646,
"loss": 0.4882,
"step": 910
},
{
"epoch": 4.53,
"grad_norm": 1.1301757097244263,
"learning_rate": 0.0027364532019704436,
"loss": 0.4728,
"step": 920
},
{
"epoch": 4.58,
"grad_norm": 1.255055546760559,
"learning_rate": 0.0027118226600985223,
"loss": 0.4384,
"step": 930
},
{
"epoch": 4.62,
"grad_norm": 1.3792164325714111,
"learning_rate": 0.0026871921182266014,
"loss": 0.5357,
"step": 940
},
{
"epoch": 4.67,
"grad_norm": 1.3066402673721313,
"learning_rate": 0.00266256157635468,
"loss": 0.5361,
"step": 950
},
{
"epoch": 4.72,
"grad_norm": 1.2377945184707642,
"learning_rate": 0.002637931034482759,
"loss": 0.5334,
"step": 960
},
{
"epoch": 4.77,
"grad_norm": 1.3673447370529175,
"learning_rate": 0.0026133004926108374,
"loss": 0.4896,
"step": 970
},
{
"epoch": 4.82,
"grad_norm": 1.8711413145065308,
"learning_rate": 0.002588669950738916,
"loss": 0.4729,
"step": 980
},
{
"epoch": 4.87,
"grad_norm": 1.1367807388305664,
"learning_rate": 0.002564039408866995,
"loss": 0.483,
"step": 990
},
{
"epoch": 4.92,
"grad_norm": 1.5432896614074707,
"learning_rate": 0.0025394088669950738,
"loss": 0.5477,
"step": 1000
},
{
"epoch": 4.97,
"grad_norm": 1.0067399740219116,
"learning_rate": 0.0025147783251231524,
"loss": 0.5193,
"step": 1010
},
{
"epoch": 5.0,
"eval_accuracy": 0.9790969899665551,
"eval_f1": 0.9675306891159455,
"eval_loss": 0.08001040667295456,
"eval_precision": 0.9700832487729493,
"eval_recall": 0.9684116828701858,
"eval_runtime": 12.5434,
"eval_samples_per_second": 190.698,
"eval_steps_per_second": 11.958,
"step": 1016
},
{
"epoch": 5.02,
"grad_norm": 1.8359023332595825,
"learning_rate": 0.0024901477832512315,
"loss": 0.5087,
"step": 1020
},
{
"epoch": 5.07,
"grad_norm": 1.3961881399154663,
"learning_rate": 0.00246551724137931,
"loss": 0.4565,
"step": 1030
},
{
"epoch": 5.12,
"grad_norm": 1.9095091819763184,
"learning_rate": 0.0024408866995073893,
"loss": 0.4432,
"step": 1040
},
{
"epoch": 5.17,
"grad_norm": 1.2952779531478882,
"learning_rate": 0.002416256157635468,
"loss": 0.4392,
"step": 1050
},
{
"epoch": 5.22,
"grad_norm": 1.309617042541504,
"learning_rate": 0.002391625615763547,
"loss": 0.5255,
"step": 1060
},
{
"epoch": 5.26,
"grad_norm": 1.497014045715332,
"learning_rate": 0.0023669950738916257,
"loss": 0.5122,
"step": 1070
},
{
"epoch": 5.31,
"grad_norm": 1.3211737871170044,
"learning_rate": 0.0023423645320197048,
"loss": 0.5529,
"step": 1080
},
{
"epoch": 5.36,
"grad_norm": 0.9946479797363281,
"learning_rate": 0.0023177339901477834,
"loss": 0.4708,
"step": 1090
},
{
"epoch": 5.41,
"grad_norm": 0.8456437587738037,
"learning_rate": 0.002293103448275862,
"loss": 0.4935,
"step": 1100
},
{
"epoch": 5.46,
"grad_norm": 1.495175838470459,
"learning_rate": 0.0022684729064039407,
"loss": 0.4975,
"step": 1110
},
{
"epoch": 5.51,
"grad_norm": 1.6447827816009521,
"learning_rate": 0.00224384236453202,
"loss": 0.4138,
"step": 1120
},
{
"epoch": 5.56,
"grad_norm": 0.8438058495521545,
"learning_rate": 0.0022192118226600985,
"loss": 0.4502,
"step": 1130
},
{
"epoch": 5.61,
"grad_norm": 1.3904708623886108,
"learning_rate": 0.0021945812807881776,
"loss": 0.4681,
"step": 1140
},
{
"epoch": 5.66,
"grad_norm": 1.498844861984253,
"learning_rate": 0.0021699507389162562,
"loss": 0.4637,
"step": 1150
},
{
"epoch": 5.71,
"grad_norm": 1.1716539859771729,
"learning_rate": 0.002145320197044335,
"loss": 0.5183,
"step": 1160
},
{
"epoch": 5.76,
"grad_norm": 0.7125697135925293,
"learning_rate": 0.002120689655172414,
"loss": 0.4307,
"step": 1170
},
{
"epoch": 5.81,
"grad_norm": 1.341647744178772,
"learning_rate": 0.0020960591133004926,
"loss": 0.4988,
"step": 1180
},
{
"epoch": 5.85,
"grad_norm": 1.4662394523620605,
"learning_rate": 0.0020714285714285717,
"loss": 0.4398,
"step": 1190
},
{
"epoch": 5.9,
"grad_norm": 1.7114837169647217,
"learning_rate": 0.0020467980295566504,
"loss": 0.4488,
"step": 1200
},
{
"epoch": 5.95,
"grad_norm": 1.0667368173599243,
"learning_rate": 0.002022167487684729,
"loss": 0.5513,
"step": 1210
},
{
"epoch": 6.0,
"eval_accuracy": 0.9862040133779264,
"eval_f1": 0.9840490701292556,
"eval_loss": 0.0578995905816555,
"eval_precision": 0.9830589209967975,
"eval_recall": 0.985517150491058,
"eval_runtime": 12.5187,
"eval_samples_per_second": 191.074,
"eval_steps_per_second": 11.982,
"step": 1219
},
{
"epoch": 6.0,
"grad_norm": 1.1651352643966675,
"learning_rate": 0.0019975369458128077,
"loss": 0.4321,
"step": 1220
},
{
"epoch": 6.05,
"grad_norm": 1.0694313049316406,
"learning_rate": 0.0019729064039408868,
"loss": 0.4343,
"step": 1230
},
{
"epoch": 6.1,
"grad_norm": 1.5686174631118774,
"learning_rate": 0.0019482758620689657,
"loss": 0.367,
"step": 1240
},
{
"epoch": 6.15,
"grad_norm": 0.7148666977882385,
"learning_rate": 0.0019236453201970443,
"loss": 0.4364,
"step": 1250
},
{
"epoch": 6.2,
"grad_norm": 1.4920200109481812,
"learning_rate": 0.0018990147783251232,
"loss": 0.4814,
"step": 1260
},
{
"epoch": 6.25,
"grad_norm": 1.0870678424835205,
"learning_rate": 0.001874384236453202,
"loss": 0.4145,
"step": 1270
},
{
"epoch": 6.3,
"grad_norm": 1.0466927289962769,
"learning_rate": 0.001849753694581281,
"loss": 0.4296,
"step": 1280
},
{
"epoch": 6.35,
"grad_norm": 0.9908223748207092,
"learning_rate": 0.0018251231527093596,
"loss": 0.4183,
"step": 1290
},
{
"epoch": 6.4,
"grad_norm": 0.6582946181297302,
"learning_rate": 0.0018004926108374385,
"loss": 0.4099,
"step": 1300
},
{
"epoch": 6.45,
"grad_norm": 1.3454304933547974,
"learning_rate": 0.0017758620689655171,
"loss": 0.367,
"step": 1310
},
{
"epoch": 6.49,
"grad_norm": 1.3359636068344116,
"learning_rate": 0.001751231527093596,
"loss": 0.4025,
"step": 1320
},
{
"epoch": 6.54,
"grad_norm": 1.2285734415054321,
"learning_rate": 0.0017266009852216749,
"loss": 0.4675,
"step": 1330
},
{
"epoch": 6.59,
"grad_norm": 0.9923570156097412,
"learning_rate": 0.0017019704433497537,
"loss": 0.3958,
"step": 1340
},
{
"epoch": 6.64,
"grad_norm": 0.8746837973594666,
"learning_rate": 0.0016773399014778326,
"loss": 0.4365,
"step": 1350
},
{
"epoch": 6.69,
"grad_norm": 0.8892514705657959,
"learning_rate": 0.0016527093596059115,
"loss": 0.4296,
"step": 1360
},
{
"epoch": 6.74,
"grad_norm": 1.2088005542755127,
"learning_rate": 0.0016280788177339904,
"loss": 0.3881,
"step": 1370
},
{
"epoch": 6.79,
"grad_norm": 1.0085664987564087,
"learning_rate": 0.0016034482758620688,
"loss": 0.4745,
"step": 1380
},
{
"epoch": 6.84,
"grad_norm": 1.0228571891784668,
"learning_rate": 0.0015788177339901477,
"loss": 0.3763,
"step": 1390
},
{
"epoch": 6.89,
"grad_norm": 0.8100888133049011,
"learning_rate": 0.0015541871921182266,
"loss": 0.4198,
"step": 1400
},
{
"epoch": 6.94,
"grad_norm": 1.2440354824066162,
"learning_rate": 0.0015295566502463054,
"loss": 0.4113,
"step": 1410
},
{
"epoch": 6.99,
"grad_norm": 0.6661180257797241,
"learning_rate": 0.0015049261083743843,
"loss": 0.4343,
"step": 1420
},
{
"epoch": 7.0,
"eval_accuracy": 0.9832775919732442,
"eval_f1": 0.9834794316662396,
"eval_loss": 0.07752905040979385,
"eval_precision": 0.985796063365073,
"eval_recall": 0.981752686688599,
"eval_runtime": 12.5648,
"eval_samples_per_second": 190.373,
"eval_steps_per_second": 11.938,
"step": 1422
},
{
"epoch": 7.04,
"grad_norm": 0.591243326663971,
"learning_rate": 0.0014802955665024632,
"loss": 0.3291,
"step": 1430
},
{
"epoch": 7.08,
"grad_norm": 0.8764331936836243,
"learning_rate": 0.001455665024630542,
"loss": 0.3704,
"step": 1440
},
{
"epoch": 7.13,
"grad_norm": 1.115868330001831,
"learning_rate": 0.0014310344827586207,
"loss": 0.3625,
"step": 1450
},
{
"epoch": 7.18,
"grad_norm": 1.1736584901809692,
"learning_rate": 0.0014064039408866996,
"loss": 0.3571,
"step": 1460
},
{
"epoch": 7.23,
"grad_norm": 0.9778345227241516,
"learning_rate": 0.0013817733990147782,
"loss": 0.3594,
"step": 1470
},
{
"epoch": 7.28,
"grad_norm": 1.1396944522857666,
"learning_rate": 0.0013571428571428571,
"loss": 0.3615,
"step": 1480
},
{
"epoch": 7.33,
"grad_norm": 1.2598211765289307,
"learning_rate": 0.001332512315270936,
"loss": 0.3802,
"step": 1490
},
{
"epoch": 7.38,
"grad_norm": 1.1756126880645752,
"learning_rate": 0.0013078817733990149,
"loss": 0.4429,
"step": 1500
},
{
"epoch": 7.43,
"grad_norm": 0.9109674096107483,
"learning_rate": 0.0012832512315270935,
"loss": 0.3578,
"step": 1510
},
{
"epoch": 7.48,
"grad_norm": 0.7428516745567322,
"learning_rate": 0.0012586206896551724,
"loss": 0.3705,
"step": 1520
},
{
"epoch": 7.53,
"grad_norm": 1.3957030773162842,
"learning_rate": 0.0012339901477832513,
"loss": 0.3769,
"step": 1530
},
{
"epoch": 7.58,
"grad_norm": 1.0507686138153076,
"learning_rate": 0.00120935960591133,
"loss": 0.3525,
"step": 1540
},
{
"epoch": 7.63,
"grad_norm": 0.8914185762405396,
"learning_rate": 0.0011847290640394088,
"loss": 0.4804,
"step": 1550
},
{
"epoch": 7.68,
"grad_norm": 0.8193994760513306,
"learning_rate": 0.0011600985221674877,
"loss": 0.298,
"step": 1560
},
{
"epoch": 7.72,
"grad_norm": 0.9263984560966492,
"learning_rate": 0.0011354679802955665,
"loss": 0.3142,
"step": 1570
},
{
"epoch": 7.77,
"grad_norm": 1.8249924182891846,
"learning_rate": 0.0011108374384236454,
"loss": 0.4135,
"step": 1580
},
{
"epoch": 7.82,
"grad_norm": 0.659723162651062,
"learning_rate": 0.001086206896551724,
"loss": 0.3844,
"step": 1590
},
{
"epoch": 7.87,
"grad_norm": 0.7200958132743835,
"learning_rate": 0.001061576354679803,
"loss": 0.3627,
"step": 1600
},
{
"epoch": 7.92,
"grad_norm": 1.055242657661438,
"learning_rate": 0.0010369458128078818,
"loss": 0.3522,
"step": 1610
},
{
"epoch": 7.97,
"grad_norm": 1.0147466659545898,
"learning_rate": 0.0010123152709359607,
"loss": 0.3942,
"step": 1620
},
{
"epoch": 8.0,
"eval_accuracy": 0.9832775919732442,
"eval_f1": 0.9816549969154011,
"eval_loss": 0.07823298126459122,
"eval_precision": 0.9812899149698605,
"eval_recall": 0.9827399465830431,
"eval_runtime": 12.6409,
"eval_samples_per_second": 189.227,
"eval_steps_per_second": 11.866,
"step": 1626
},
{
"epoch": 8.02,
"grad_norm": 1.2122970819473267,
"learning_rate": 0.0009876847290640393,
"loss": 0.2658,
"step": 1630
},
{
"epoch": 8.07,
"grad_norm": 0.6683902740478516,
"learning_rate": 0.0009630541871921182,
"loss": 0.3499,
"step": 1640
},
{
"epoch": 8.12,
"grad_norm": 1.0198993682861328,
"learning_rate": 0.0009384236453201971,
"loss": 0.4031,
"step": 1650
},
{
"epoch": 8.17,
"grad_norm": 1.2388522624969482,
"learning_rate": 0.0009137931034482759,
"loss": 0.3164,
"step": 1660
},
{
"epoch": 8.22,
"grad_norm": 1.2226431369781494,
"learning_rate": 0.0008891625615763547,
"loss": 0.299,
"step": 1670
},
{
"epoch": 8.27,
"grad_norm": 1.0016721487045288,
"learning_rate": 0.0008645320197044335,
"loss": 0.3315,
"step": 1680
},
{
"epoch": 8.31,
"grad_norm": 1.0766950845718384,
"learning_rate": 0.0008399014778325123,
"loss": 0.286,
"step": 1690
},
{
"epoch": 8.36,
"grad_norm": 1.8925853967666626,
"learning_rate": 0.0008152709359605911,
"loss": 0.3745,
"step": 1700
},
{
"epoch": 8.41,
"grad_norm": 0.8409897685050964,
"learning_rate": 0.00079064039408867,
"loss": 0.3276,
"step": 1710
},
{
"epoch": 8.46,
"grad_norm": 1.1315199136734009,
"learning_rate": 0.0007660098522167489,
"loss": 0.3275,
"step": 1720
},
{
"epoch": 8.51,
"grad_norm": 1.05160391330719,
"learning_rate": 0.0007413793103448275,
"loss": 0.3072,
"step": 1730
},
{
"epoch": 8.56,
"grad_norm": 1.0058565139770508,
"learning_rate": 0.0007167487684729064,
"loss": 0.3413,
"step": 1740
},
{
"epoch": 8.61,
"grad_norm": 0.9650315046310425,
"learning_rate": 0.0006921182266009853,
"loss": 0.365,
"step": 1750
},
{
"epoch": 8.66,
"grad_norm": 0.6396649479866028,
"learning_rate": 0.0006674876847290641,
"loss": 0.3271,
"step": 1760
},
{
"epoch": 8.71,
"grad_norm": 0.7196962833404541,
"learning_rate": 0.0006428571428571428,
"loss": 0.3683,
"step": 1770
},
{
"epoch": 8.76,
"grad_norm": 0.8004360198974609,
"learning_rate": 0.0006182266009852217,
"loss": 0.3687,
"step": 1780
},
{
"epoch": 8.81,
"grad_norm": 0.9620378613471985,
"learning_rate": 0.0005935960591133005,
"loss": 0.3298,
"step": 1790
},
{
"epoch": 8.86,
"grad_norm": 0.7279284596443176,
"learning_rate": 0.0005689655172413793,
"loss": 0.3109,
"step": 1800
},
{
"epoch": 8.91,
"grad_norm": 1.2889859676361084,
"learning_rate": 0.0005443349753694581,
"loss": 0.4205,
"step": 1810
},
{
"epoch": 8.95,
"grad_norm": 0.8951707482337952,
"learning_rate": 0.000519704433497537,
"loss": 0.2971,
"step": 1820
},
{
"epoch": 9.0,
"eval_accuracy": 0.9862040133779264,
"eval_f1": 0.9872927641297526,
"eval_loss": 0.08386523276567459,
"eval_precision": 0.9884096313888006,
"eval_recall": 0.9865651085702777,
"eval_runtime": 12.5898,
"eval_samples_per_second": 189.995,
"eval_steps_per_second": 11.914,
"step": 1829
},
{
"epoch": 9.0,
"grad_norm": 0.6368119120597839,
"learning_rate": 0.0004950738916256157,
"loss": 0.3692,
"step": 1830
},
{
"epoch": 9.05,
"grad_norm": 0.5298008322715759,
"learning_rate": 0.0004704433497536946,
"loss": 0.3112,
"step": 1840
},
{
"epoch": 9.1,
"grad_norm": 0.5324183702468872,
"learning_rate": 0.0004458128078817734,
"loss": 0.3389,
"step": 1850
},
{
"epoch": 9.15,
"grad_norm": 0.6283653974533081,
"learning_rate": 0.0004211822660098522,
"loss": 0.3328,
"step": 1860
},
{
"epoch": 9.2,
"grad_norm": 0.9130664467811584,
"learning_rate": 0.0003965517241379311,
"loss": 0.2774,
"step": 1870
},
{
"epoch": 9.25,
"grad_norm": 0.569354772567749,
"learning_rate": 0.00037192118226600984,
"loss": 0.3055,
"step": 1880
},
{
"epoch": 9.3,
"grad_norm": 0.91834557056427,
"learning_rate": 0.0003472906403940887,
"loss": 0.3108,
"step": 1890
},
{
"epoch": 9.35,
"grad_norm": 1.2413830757141113,
"learning_rate": 0.0003226600985221675,
"loss": 0.2896,
"step": 1900
},
{
"epoch": 9.4,
"grad_norm": 0.7163951396942139,
"learning_rate": 0.0002980295566502463,
"loss": 0.2883,
"step": 1910
},
{
"epoch": 9.45,
"grad_norm": 1.121543288230896,
"learning_rate": 0.0002733990147783251,
"loss": 0.3061,
"step": 1920
},
{
"epoch": 9.5,
"grad_norm": 0.8933872580528259,
"learning_rate": 0.00024876847290640394,
"loss": 0.3087,
"step": 1930
},
{
"epoch": 9.54,
"grad_norm": 0.7040281295776367,
"learning_rate": 0.00022413793103448276,
"loss": 0.2723,
"step": 1940
},
{
"epoch": 9.59,
"grad_norm": 0.3641883432865143,
"learning_rate": 0.00019950738916256158,
"loss": 0.2397,
"step": 1950
},
{
"epoch": 9.64,
"grad_norm": 1.092432975769043,
"learning_rate": 0.0001748768472906404,
"loss": 0.2763,
"step": 1960
},
{
"epoch": 9.69,
"grad_norm": 0.6717754602432251,
"learning_rate": 0.00015024630541871922,
"loss": 0.2921,
"step": 1970
},
{
"epoch": 9.74,
"grad_norm": 0.8007289171218872,
"learning_rate": 0.00012561576354679804,
"loss": 0.3365,
"step": 1980
},
{
"epoch": 9.79,
"grad_norm": 0.9652357697486877,
"learning_rate": 0.00010098522167487686,
"loss": 0.2998,
"step": 1990
},
{
"epoch": 9.84,
"grad_norm": 0.6942909955978394,
"learning_rate": 7.635467980295568e-05,
"loss": 0.2917,
"step": 2000
},
{
"epoch": 9.89,
"grad_norm": 0.5785544514656067,
"learning_rate": 5.172413793103448e-05,
"loss": 0.2662,
"step": 2010
},
{
"epoch": 9.94,
"grad_norm": 0.7549653649330139,
"learning_rate": 2.70935960591133e-05,
"loss": 0.2989,
"step": 2020
},
{
"epoch": 9.99,
"grad_norm": 1.3010107278823853,
"learning_rate": 2.463054187192118e-06,
"loss": 0.3242,
"step": 2030
},
{
"epoch": 9.99,
"eval_accuracy": 0.9870401337792643,
"eval_f1": 0.9868360981525407,
"eval_loss": 0.07449871301651001,
"eval_precision": 0.9876698762890729,
"eval_recall": 0.9863397416476135,
"eval_runtime": 12.5861,
"eval_samples_per_second": 190.05,
"eval_steps_per_second": 11.918,
"step": 2030
},
{
"epoch": 9.99,
"step": 2030,
"total_flos": 1.0133154899356189e+19,
"train_loss": 0.5248493043072705,
"train_runtime": 1518.4893,
"train_samples_per_second": 85.611,
"train_steps_per_second": 1.337
}
],
"logging_steps": 10,
"max_steps": 2030,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.0133154899356189e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}