akkky02's picture
managed the repo
8f4e803
raw
history blame contribute delete
No virus
26.4 kB
{
"best_metric": 1.0022460222244263,
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/distilbert/distilroberta_base_patent/checkpoint-1150",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 1173,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 2.0824131965637207,
"learning_rate": 1.9829497016197786e-05,
"loss": 2.183,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 4.778645038604736,
"learning_rate": 1.9658994032395567e-05,
"loss": 1.9562,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 4.916919231414795,
"learning_rate": 1.9488491048593352e-05,
"loss": 1.7366,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 6.5065836906433105,
"learning_rate": 1.9317988064791137e-05,
"loss": 1.5817,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 3.7337374687194824,
"learning_rate": 1.9147485080988918e-05,
"loss": 1.5474,
"step": 50
},
{
"epoch": 0.13,
"eval_accuracy": 0.4644,
"eval_f1_macro": 0.3007262651897904,
"eval_f1_micro": 0.4644,
"eval_loss": 1.4682279825210571,
"eval_runtime": 2.5172,
"eval_samples_per_second": 1986.333,
"eval_steps_per_second": 31.384,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 3.306447744369507,
"learning_rate": 1.8976982097186702e-05,
"loss": 1.4614,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 4.347895622253418,
"learning_rate": 1.8806479113384487e-05,
"loss": 1.4045,
"step": 70
},
{
"epoch": 0.2,
"grad_norm": 4.505576133728027,
"learning_rate": 1.863597612958227e-05,
"loss": 1.397,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 7.081475734710693,
"learning_rate": 1.8465473145780053e-05,
"loss": 1.3458,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 4.229913711547852,
"learning_rate": 1.8294970161977838e-05,
"loss": 1.2975,
"step": 100
},
{
"epoch": 0.26,
"eval_accuracy": 0.5514,
"eval_f1_macro": 0.385696822491771,
"eval_f1_micro": 0.5514,
"eval_loss": 1.2702211141586304,
"eval_runtime": 2.4835,
"eval_samples_per_second": 2013.318,
"eval_steps_per_second": 31.81,
"step": 100
},
{
"epoch": 0.28,
"grad_norm": 6.899994373321533,
"learning_rate": 1.812446717817562e-05,
"loss": 1.3211,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 5.848340034484863,
"learning_rate": 1.7953964194373403e-05,
"loss": 1.2881,
"step": 120
},
{
"epoch": 0.33,
"grad_norm": 6.385013103485107,
"learning_rate": 1.7783461210571188e-05,
"loss": 1.205,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 6.531163692474365,
"learning_rate": 1.761295822676897e-05,
"loss": 1.2051,
"step": 140
},
{
"epoch": 0.38,
"grad_norm": 6.575864791870117,
"learning_rate": 1.7442455242966754e-05,
"loss": 1.277,
"step": 150
},
{
"epoch": 0.38,
"eval_accuracy": 0.588,
"eval_f1_macro": 0.4212858134893549,
"eval_f1_micro": 0.588,
"eval_loss": 1.1989067792892456,
"eval_runtime": 2.4878,
"eval_samples_per_second": 2009.844,
"eval_steps_per_second": 31.756,
"step": 150
},
{
"epoch": 0.41,
"grad_norm": 5.341747760772705,
"learning_rate": 1.727195225916454e-05,
"loss": 1.1411,
"step": 160
},
{
"epoch": 0.43,
"grad_norm": 6.646276473999023,
"learning_rate": 1.710144927536232e-05,
"loss": 1.2777,
"step": 170
},
{
"epoch": 0.46,
"grad_norm": 7.130493640899658,
"learning_rate": 1.6930946291560104e-05,
"loss": 1.2124,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 6.087169647216797,
"learning_rate": 1.676044330775789e-05,
"loss": 1.2863,
"step": 190
},
{
"epoch": 0.51,
"grad_norm": 5.556641101837158,
"learning_rate": 1.658994032395567e-05,
"loss": 1.1483,
"step": 200
},
{
"epoch": 0.51,
"eval_accuracy": 0.6018,
"eval_f1_macro": 0.44326147747487465,
"eval_f1_micro": 0.6018,
"eval_loss": 1.150877833366394,
"eval_runtime": 2.5501,
"eval_samples_per_second": 1960.73,
"eval_steps_per_second": 30.98,
"step": 200
},
{
"epoch": 0.54,
"grad_norm": 7.140419960021973,
"learning_rate": 1.6419437340153455e-05,
"loss": 1.1968,
"step": 210
},
{
"epoch": 0.56,
"grad_norm": 5.2833147048950195,
"learning_rate": 1.624893435635124e-05,
"loss": 1.1433,
"step": 220
},
{
"epoch": 0.59,
"grad_norm": 5.170966148376465,
"learning_rate": 1.607843137254902e-05,
"loss": 1.1196,
"step": 230
},
{
"epoch": 0.61,
"grad_norm": 8.696062088012695,
"learning_rate": 1.5907928388746805e-05,
"loss": 1.1489,
"step": 240
},
{
"epoch": 0.64,
"grad_norm": 5.4057698249816895,
"learning_rate": 1.573742540494459e-05,
"loss": 1.1909,
"step": 250
},
{
"epoch": 0.64,
"eval_accuracy": 0.618,
"eval_f1_macro": 0.4784833651456958,
"eval_f1_micro": 0.618,
"eval_loss": 1.1209474802017212,
"eval_runtime": 2.5515,
"eval_samples_per_second": 1959.62,
"eval_steps_per_second": 30.962,
"step": 250
},
{
"epoch": 0.66,
"grad_norm": 6.896720886230469,
"learning_rate": 1.556692242114237e-05,
"loss": 1.191,
"step": 260
},
{
"epoch": 0.69,
"grad_norm": 6.1380791664123535,
"learning_rate": 1.5396419437340155e-05,
"loss": 1.1447,
"step": 270
},
{
"epoch": 0.72,
"grad_norm": 6.051652908325195,
"learning_rate": 1.5225916453537938e-05,
"loss": 1.0847,
"step": 280
},
{
"epoch": 0.74,
"grad_norm": 8.247784614562988,
"learning_rate": 1.5055413469735723e-05,
"loss": 1.1495,
"step": 290
},
{
"epoch": 0.77,
"grad_norm": 5.845155715942383,
"learning_rate": 1.4884910485933506e-05,
"loss": 1.1243,
"step": 300
},
{
"epoch": 0.77,
"eval_accuracy": 0.622,
"eval_f1_macro": 0.49297464163053206,
"eval_f1_micro": 0.622,
"eval_loss": 1.1127510070800781,
"eval_runtime": 2.5574,
"eval_samples_per_second": 1955.106,
"eval_steps_per_second": 30.891,
"step": 300
},
{
"epoch": 0.79,
"grad_norm": 6.277886867523193,
"learning_rate": 1.4714407502131289e-05,
"loss": 1.1136,
"step": 310
},
{
"epoch": 0.82,
"grad_norm": 6.175780773162842,
"learning_rate": 1.4543904518329073e-05,
"loss": 1.1245,
"step": 320
},
{
"epoch": 0.84,
"grad_norm": 5.58172607421875,
"learning_rate": 1.4373401534526856e-05,
"loss": 1.0753,
"step": 330
},
{
"epoch": 0.87,
"grad_norm": 6.277215480804443,
"learning_rate": 1.420289855072464e-05,
"loss": 1.1035,
"step": 340
},
{
"epoch": 0.9,
"grad_norm": 6.460805892944336,
"learning_rate": 1.4032395566922424e-05,
"loss": 1.1353,
"step": 350
},
{
"epoch": 0.9,
"eval_accuracy": 0.609,
"eval_f1_macro": 0.49301677161817686,
"eval_f1_micro": 0.609,
"eval_loss": 1.113377571105957,
"eval_runtime": 2.5617,
"eval_samples_per_second": 1951.828,
"eval_steps_per_second": 30.839,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 5.35620641708374,
"learning_rate": 1.3861892583120207e-05,
"loss": 1.1247,
"step": 360
},
{
"epoch": 0.95,
"grad_norm": 7.259354591369629,
"learning_rate": 1.369138959931799e-05,
"loss": 1.1226,
"step": 370
},
{
"epoch": 0.97,
"grad_norm": 6.781973838806152,
"learning_rate": 1.3520886615515774e-05,
"loss": 1.035,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 5.575621604919434,
"learning_rate": 1.3350383631713557e-05,
"loss": 1.1588,
"step": 390
},
{
"epoch": 1.02,
"grad_norm": 7.666539669036865,
"learning_rate": 1.317988064791134e-05,
"loss": 1.0636,
"step": 400
},
{
"epoch": 1.02,
"eval_accuracy": 0.64,
"eval_f1_macro": 0.5189274075388721,
"eval_f1_micro": 0.64,
"eval_loss": 1.0675694942474365,
"eval_runtime": 2.5643,
"eval_samples_per_second": 1949.816,
"eval_steps_per_second": 30.807,
"step": 400
},
{
"epoch": 1.05,
"grad_norm": 4.991756916046143,
"learning_rate": 1.3009377664109125e-05,
"loss": 1.0461,
"step": 410
},
{
"epoch": 1.07,
"grad_norm": 6.300774574279785,
"learning_rate": 1.2838874680306906e-05,
"loss": 1.019,
"step": 420
},
{
"epoch": 1.1,
"grad_norm": 7.338348388671875,
"learning_rate": 1.2668371696504689e-05,
"loss": 1.0367,
"step": 430
},
{
"epoch": 1.13,
"grad_norm": 7.154069423675537,
"learning_rate": 1.2497868712702472e-05,
"loss": 1.094,
"step": 440
},
{
"epoch": 1.15,
"grad_norm": 4.677978515625,
"learning_rate": 1.2327365728900256e-05,
"loss": 0.9667,
"step": 450
},
{
"epoch": 1.15,
"eval_accuracy": 0.6404,
"eval_f1_macro": 0.5192652764751173,
"eval_f1_micro": 0.6404,
"eval_loss": 1.0703057050704956,
"eval_runtime": 2.5638,
"eval_samples_per_second": 1950.255,
"eval_steps_per_second": 30.814,
"step": 450
},
{
"epoch": 1.18,
"grad_norm": 6.083702564239502,
"learning_rate": 1.215686274509804e-05,
"loss": 1.0621,
"step": 460
},
{
"epoch": 1.2,
"grad_norm": 10.16114330291748,
"learning_rate": 1.1986359761295822e-05,
"loss": 1.0172,
"step": 470
},
{
"epoch": 1.23,
"grad_norm": 6.390872955322266,
"learning_rate": 1.1815856777493607e-05,
"loss": 0.972,
"step": 480
},
{
"epoch": 1.25,
"grad_norm": 7.280271530151367,
"learning_rate": 1.164535379369139e-05,
"loss": 0.9825,
"step": 490
},
{
"epoch": 1.28,
"grad_norm": 7.670015335083008,
"learning_rate": 1.1474850809889173e-05,
"loss": 1.0063,
"step": 500
},
{
"epoch": 1.28,
"eval_accuracy": 0.6386,
"eval_f1_macro": 0.5127563688009887,
"eval_f1_micro": 0.6386,
"eval_loss": 1.049485206604004,
"eval_runtime": 2.5639,
"eval_samples_per_second": 1950.156,
"eval_steps_per_second": 30.812,
"step": 500
},
{
"epoch": 1.3,
"grad_norm": 5.522829532623291,
"learning_rate": 1.1304347826086957e-05,
"loss": 1.023,
"step": 510
},
{
"epoch": 1.33,
"grad_norm": 9.047003746032715,
"learning_rate": 1.113384484228474e-05,
"loss": 1.0607,
"step": 520
},
{
"epoch": 1.36,
"grad_norm": 10.908841133117676,
"learning_rate": 1.0963341858482523e-05,
"loss": 1.0389,
"step": 530
},
{
"epoch": 1.38,
"grad_norm": 8.025410652160645,
"learning_rate": 1.0792838874680308e-05,
"loss": 1.0097,
"step": 540
},
{
"epoch": 1.41,
"grad_norm": 6.375541687011719,
"learning_rate": 1.062233589087809e-05,
"loss": 0.9521,
"step": 550
},
{
"epoch": 1.41,
"eval_accuracy": 0.6432,
"eval_f1_macro": 0.5185062629959201,
"eval_f1_micro": 0.6432,
"eval_loss": 1.04691743850708,
"eval_runtime": 2.5619,
"eval_samples_per_second": 1951.646,
"eval_steps_per_second": 30.836,
"step": 550
},
{
"epoch": 1.43,
"grad_norm": 8.49925708770752,
"learning_rate": 1.0451832907075873e-05,
"loss": 1.0224,
"step": 560
},
{
"epoch": 1.46,
"grad_norm": 9.955578804016113,
"learning_rate": 1.0281329923273658e-05,
"loss": 1.0192,
"step": 570
},
{
"epoch": 1.48,
"grad_norm": 6.0503363609313965,
"learning_rate": 1.0110826939471441e-05,
"loss": 0.9438,
"step": 580
},
{
"epoch": 1.51,
"grad_norm": 7.6630048751831055,
"learning_rate": 9.940323955669226e-06,
"loss": 0.8864,
"step": 590
},
{
"epoch": 1.53,
"grad_norm": 6.8778815269470215,
"learning_rate": 9.769820971867009e-06,
"loss": 0.998,
"step": 600
},
{
"epoch": 1.53,
"eval_accuracy": 0.6486,
"eval_f1_macro": 0.5356688160968893,
"eval_f1_micro": 0.6486,
"eval_loss": 1.0358997583389282,
"eval_runtime": 2.5668,
"eval_samples_per_second": 1947.936,
"eval_steps_per_second": 30.777,
"step": 600
},
{
"epoch": 1.56,
"grad_norm": 7.481098651885986,
"learning_rate": 9.599317988064793e-06,
"loss": 1.0152,
"step": 610
},
{
"epoch": 1.59,
"grad_norm": 7.424659252166748,
"learning_rate": 9.428815004262576e-06,
"loss": 1.01,
"step": 620
},
{
"epoch": 1.61,
"grad_norm": 9.956852912902832,
"learning_rate": 9.258312020460359e-06,
"loss": 1.0664,
"step": 630
},
{
"epoch": 1.64,
"grad_norm": 7.87490177154541,
"learning_rate": 9.087809036658142e-06,
"loss": 0.924,
"step": 640
},
{
"epoch": 1.66,
"grad_norm": 8.608465194702148,
"learning_rate": 8.917306052855925e-06,
"loss": 1.0188,
"step": 650
},
{
"epoch": 1.66,
"eval_accuracy": 0.6418,
"eval_f1_macro": 0.5394812129950733,
"eval_f1_micro": 0.6418,
"eval_loss": 1.0529745817184448,
"eval_runtime": 2.5692,
"eval_samples_per_second": 1946.122,
"eval_steps_per_second": 30.749,
"step": 650
},
{
"epoch": 1.69,
"grad_norm": 7.214836597442627,
"learning_rate": 8.74680306905371e-06,
"loss": 0.9906,
"step": 660
},
{
"epoch": 1.71,
"grad_norm": 4.831575870513916,
"learning_rate": 8.576300085251492e-06,
"loss": 1.052,
"step": 670
},
{
"epoch": 1.74,
"grad_norm": 7.768477916717529,
"learning_rate": 8.405797101449275e-06,
"loss": 1.0451,
"step": 680
},
{
"epoch": 1.76,
"grad_norm": 7.0132527351379395,
"learning_rate": 8.23529411764706e-06,
"loss": 0.9187,
"step": 690
},
{
"epoch": 1.79,
"grad_norm": 12.03316879272461,
"learning_rate": 8.064791133844843e-06,
"loss": 0.9617,
"step": 700
},
{
"epoch": 1.79,
"eval_accuracy": 0.6526,
"eval_f1_macro": 0.5307017878131918,
"eval_f1_micro": 0.6526,
"eval_loss": 1.0214056968688965,
"eval_runtime": 2.5148,
"eval_samples_per_second": 1988.225,
"eval_steps_per_second": 31.414,
"step": 700
},
{
"epoch": 1.82,
"grad_norm": 7.816739559173584,
"learning_rate": 7.894288150042626e-06,
"loss": 1.008,
"step": 710
},
{
"epoch": 1.84,
"grad_norm": 9.684741020202637,
"learning_rate": 7.72378516624041e-06,
"loss": 1.0609,
"step": 720
},
{
"epoch": 1.87,
"grad_norm": 6.8859453201293945,
"learning_rate": 7.553282182438193e-06,
"loss": 1.0002,
"step": 730
},
{
"epoch": 1.89,
"grad_norm": 6.9528350830078125,
"learning_rate": 7.382779198635977e-06,
"loss": 1.0341,
"step": 740
},
{
"epoch": 1.92,
"grad_norm": 6.688358783721924,
"learning_rate": 7.21227621483376e-06,
"loss": 1.0234,
"step": 750
},
{
"epoch": 1.92,
"eval_accuracy": 0.6514,
"eval_f1_macro": 0.5495228476194496,
"eval_f1_micro": 0.6514,
"eval_loss": 1.0148464441299438,
"eval_runtime": 2.5658,
"eval_samples_per_second": 1948.681,
"eval_steps_per_second": 30.789,
"step": 750
},
{
"epoch": 1.94,
"grad_norm": 7.913070201873779,
"learning_rate": 7.0417732310315436e-06,
"loss": 1.0278,
"step": 760
},
{
"epoch": 1.97,
"grad_norm": 7.68366813659668,
"learning_rate": 6.8712702472293265e-06,
"loss": 0.9754,
"step": 770
},
{
"epoch": 1.99,
"grad_norm": 7.161551475524902,
"learning_rate": 6.70076726342711e-06,
"loss": 1.0346,
"step": 780
},
{
"epoch": 2.02,
"grad_norm": 7.975135803222656,
"learning_rate": 6.530264279624894e-06,
"loss": 0.9265,
"step": 790
},
{
"epoch": 2.05,
"grad_norm": 6.6218791007995605,
"learning_rate": 6.359761295822677e-06,
"loss": 0.8914,
"step": 800
},
{
"epoch": 2.05,
"eval_accuracy": 0.6544,
"eval_f1_macro": 0.5603159525538163,
"eval_f1_micro": 0.6544,
"eval_loss": 1.0131620168685913,
"eval_runtime": 2.5635,
"eval_samples_per_second": 1950.467,
"eval_steps_per_second": 30.817,
"step": 800
},
{
"epoch": 2.07,
"grad_norm": 11.909503936767578,
"learning_rate": 6.189258312020461e-06,
"loss": 0.9247,
"step": 810
},
{
"epoch": 2.1,
"grad_norm": 6.921329975128174,
"learning_rate": 6.018755328218244e-06,
"loss": 0.8752,
"step": 820
},
{
"epoch": 2.12,
"grad_norm": 8.646865844726562,
"learning_rate": 5.848252344416027e-06,
"loss": 0.9489,
"step": 830
},
{
"epoch": 2.15,
"grad_norm": 6.761762619018555,
"learning_rate": 5.677749360613811e-06,
"loss": 0.9303,
"step": 840
},
{
"epoch": 2.17,
"grad_norm": 6.754298210144043,
"learning_rate": 5.507246376811595e-06,
"loss": 0.9269,
"step": 850
},
{
"epoch": 2.17,
"eval_accuracy": 0.6562,
"eval_f1_macro": 0.5646631294942075,
"eval_f1_micro": 0.6562,
"eval_loss": 1.0109927654266357,
"eval_runtime": 2.5641,
"eval_samples_per_second": 1949.972,
"eval_steps_per_second": 30.81,
"step": 850
},
{
"epoch": 2.2,
"grad_norm": 7.462425708770752,
"learning_rate": 5.336743393009378e-06,
"loss": 1.0024,
"step": 860
},
{
"epoch": 2.23,
"grad_norm": 10.302495002746582,
"learning_rate": 5.1662404092071615e-06,
"loss": 0.932,
"step": 870
},
{
"epoch": 2.25,
"grad_norm": 6.7270379066467285,
"learning_rate": 4.995737425404945e-06,
"loss": 1.0035,
"step": 880
},
{
"epoch": 2.28,
"grad_norm": 8.183883666992188,
"learning_rate": 4.825234441602728e-06,
"loss": 0.9479,
"step": 890
},
{
"epoch": 2.3,
"grad_norm": 6.861289024353027,
"learning_rate": 4.654731457800512e-06,
"loss": 1.0351,
"step": 900
},
{
"epoch": 2.3,
"eval_accuracy": 0.6528,
"eval_f1_macro": 0.5716748212028093,
"eval_f1_micro": 0.6528,
"eval_loss": 1.0123671293258667,
"eval_runtime": 2.568,
"eval_samples_per_second": 1947.076,
"eval_steps_per_second": 30.764,
"step": 900
},
{
"epoch": 2.33,
"grad_norm": 7.027976036071777,
"learning_rate": 4.484228473998296e-06,
"loss": 0.9684,
"step": 910
},
{
"epoch": 2.35,
"grad_norm": 7.110782623291016,
"learning_rate": 4.313725490196079e-06,
"loss": 0.9008,
"step": 920
},
{
"epoch": 2.38,
"grad_norm": 8.018675804138184,
"learning_rate": 4.143222506393862e-06,
"loss": 0.9205,
"step": 930
},
{
"epoch": 2.4,
"grad_norm": 7.683128833770752,
"learning_rate": 3.972719522591646e-06,
"loss": 0.9033,
"step": 940
},
{
"epoch": 2.43,
"grad_norm": 8.171706199645996,
"learning_rate": 3.802216538789429e-06,
"loss": 0.9582,
"step": 950
},
{
"epoch": 2.43,
"eval_accuracy": 0.6524,
"eval_f1_macro": 0.555182452599764,
"eval_f1_micro": 0.6524,
"eval_loss": 1.0150039196014404,
"eval_runtime": 2.5637,
"eval_samples_per_second": 1950.307,
"eval_steps_per_second": 30.815,
"step": 950
},
{
"epoch": 2.46,
"grad_norm": 8.379620552062988,
"learning_rate": 3.6317135549872124e-06,
"loss": 0.9552,
"step": 960
},
{
"epoch": 2.48,
"grad_norm": 7.152894496917725,
"learning_rate": 3.4612105711849957e-06,
"loss": 0.9719,
"step": 970
},
{
"epoch": 2.51,
"grad_norm": 6.816888809204102,
"learning_rate": 3.2907075873827795e-06,
"loss": 0.8721,
"step": 980
},
{
"epoch": 2.53,
"grad_norm": 7.131276607513428,
"learning_rate": 3.120204603580563e-06,
"loss": 0.9568,
"step": 990
},
{
"epoch": 2.56,
"grad_norm": 9.819961547851562,
"learning_rate": 2.949701619778346e-06,
"loss": 0.8959,
"step": 1000
},
{
"epoch": 2.56,
"eval_accuracy": 0.659,
"eval_f1_macro": 0.5741291743786698,
"eval_f1_micro": 0.659,
"eval_loss": 1.0068503618240356,
"eval_runtime": 2.5602,
"eval_samples_per_second": 1953.0,
"eval_steps_per_second": 30.857,
"step": 1000
},
{
"epoch": 2.58,
"grad_norm": 6.770971298217773,
"learning_rate": 2.77919863597613e-06,
"loss": 0.9506,
"step": 1010
},
{
"epoch": 2.61,
"grad_norm": 6.461827278137207,
"learning_rate": 2.6086956521739132e-06,
"loss": 0.8873,
"step": 1020
},
{
"epoch": 2.63,
"grad_norm": 5.610044479370117,
"learning_rate": 2.4381926683716966e-06,
"loss": 0.8473,
"step": 1030
},
{
"epoch": 2.66,
"grad_norm": 7.214883804321289,
"learning_rate": 2.2676896845694803e-06,
"loss": 0.8994,
"step": 1040
},
{
"epoch": 2.69,
"grad_norm": 10.100293159484863,
"learning_rate": 2.0971867007672637e-06,
"loss": 0.8342,
"step": 1050
},
{
"epoch": 2.69,
"eval_accuracy": 0.6596,
"eval_f1_macro": 0.5793678136298913,
"eval_f1_micro": 0.6596,
"eval_loss": 1.00313401222229,
"eval_runtime": 2.5601,
"eval_samples_per_second": 1953.051,
"eval_steps_per_second": 30.858,
"step": 1050
},
{
"epoch": 2.71,
"grad_norm": 6.870211124420166,
"learning_rate": 1.926683716965047e-06,
"loss": 0.8885,
"step": 1060
},
{
"epoch": 2.74,
"grad_norm": 10.746337890625,
"learning_rate": 1.7561807331628305e-06,
"loss": 0.9337,
"step": 1070
},
{
"epoch": 2.76,
"grad_norm": 7.3471221923828125,
"learning_rate": 1.585677749360614e-06,
"loss": 0.9137,
"step": 1080
},
{
"epoch": 2.79,
"grad_norm": 5.51514196395874,
"learning_rate": 1.4151747655583974e-06,
"loss": 0.8889,
"step": 1090
},
{
"epoch": 2.81,
"grad_norm": 6.89326810836792,
"learning_rate": 1.2446717817561808e-06,
"loss": 0.883,
"step": 1100
},
{
"epoch": 2.81,
"eval_accuracy": 0.6594,
"eval_f1_macro": 0.5767016713811989,
"eval_f1_micro": 0.6594,
"eval_loss": 1.0041770935058594,
"eval_runtime": 2.5645,
"eval_samples_per_second": 1949.663,
"eval_steps_per_second": 30.805,
"step": 1100
},
{
"epoch": 2.84,
"grad_norm": 5.81862735748291,
"learning_rate": 1.0741687979539643e-06,
"loss": 0.943,
"step": 1110
},
{
"epoch": 2.86,
"grad_norm": 7.020530700683594,
"learning_rate": 9.036658141517478e-07,
"loss": 0.9289,
"step": 1120
},
{
"epoch": 2.89,
"grad_norm": 6.579043388366699,
"learning_rate": 7.331628303495311e-07,
"loss": 0.9096,
"step": 1130
},
{
"epoch": 2.92,
"grad_norm": 6.589502811431885,
"learning_rate": 5.626598465473146e-07,
"loss": 0.9547,
"step": 1140
},
{
"epoch": 2.94,
"grad_norm": 6.961599349975586,
"learning_rate": 3.921568627450981e-07,
"loss": 0.9377,
"step": 1150
},
{
"epoch": 2.94,
"eval_accuracy": 0.6596,
"eval_f1_macro": 0.5724668431238462,
"eval_f1_micro": 0.6596,
"eval_loss": 1.0022460222244263,
"eval_runtime": 2.5601,
"eval_samples_per_second": 1953.074,
"eval_steps_per_second": 30.859,
"step": 1150
},
{
"epoch": 2.97,
"grad_norm": 6.636003017425537,
"learning_rate": 2.2165387894288152e-07,
"loss": 0.8657,
"step": 1160
},
{
"epoch": 2.99,
"grad_norm": 8.342982292175293,
"learning_rate": 5.115089514066497e-08,
"loss": 0.9396,
"step": 1170
},
{
"epoch": 3.0,
"step": 1173,
"total_flos": 2486458469842944.0,
"train_loss": 1.0698413495212564,
"train_runtime": 210.2936,
"train_samples_per_second": 356.644,
"train_steps_per_second": 5.578
}
],
"logging_steps": 10,
"max_steps": 1173,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 2486458469842944.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}