akkky02's picture
Upload folder using huggingface_hub
c2ce683 verified
raw
history blame
No virus
26.4 kB
{
"best_metric": 0.9373559355735779,
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/FacebookAI/roberta_base_patent/checkpoint-1150",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 1173,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 3.06965708732605,
"learning_rate": 1.9829497016197786e-05,
"loss": 2.1487,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 4.080266952514648,
"learning_rate": 1.9658994032395567e-05,
"loss": 1.9493,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 6.19921875,
"learning_rate": 1.9488491048593352e-05,
"loss": 1.7343,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 7.48213529586792,
"learning_rate": 1.9317988064791137e-05,
"loss": 1.5789,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 4.417103290557861,
"learning_rate": 1.9147485080988918e-05,
"loss": 1.5014,
"step": 50
},
{
"epoch": 0.13,
"eval_accuracy": 0.5094,
"eval_f1_macro": 0.3518682769080962,
"eval_f1_micro": 0.5094,
"eval_loss": 1.424868106842041,
"eval_runtime": 4.5754,
"eval_samples_per_second": 1092.79,
"eval_steps_per_second": 17.266,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 6.253727436065674,
"learning_rate": 1.8976982097186702e-05,
"loss": 1.432,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 7.414198398590088,
"learning_rate": 1.8806479113384487e-05,
"loss": 1.4074,
"step": 70
},
{
"epoch": 0.2,
"grad_norm": 5.386297225952148,
"learning_rate": 1.863597612958227e-05,
"loss": 1.3724,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 7.517615795135498,
"learning_rate": 1.8465473145780053e-05,
"loss": 1.3282,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 5.478610515594482,
"learning_rate": 1.8294970161977838e-05,
"loss": 1.2516,
"step": 100
},
{
"epoch": 0.26,
"eval_accuracy": 0.5716,
"eval_f1_macro": 0.41097137925827465,
"eval_f1_micro": 0.5716,
"eval_loss": 1.235276222229004,
"eval_runtime": 4.6509,
"eval_samples_per_second": 1075.062,
"eval_steps_per_second": 16.986,
"step": 100
},
{
"epoch": 0.28,
"grad_norm": 9.781042098999023,
"learning_rate": 1.812446717817562e-05,
"loss": 1.284,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 8.909000396728516,
"learning_rate": 1.7953964194373403e-05,
"loss": 1.2797,
"step": 120
},
{
"epoch": 0.33,
"grad_norm": 9.288195610046387,
"learning_rate": 1.7783461210571188e-05,
"loss": 1.1562,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 9.174208641052246,
"learning_rate": 1.761295822676897e-05,
"loss": 1.1898,
"step": 140
},
{
"epoch": 0.38,
"grad_norm": 8.350323677062988,
"learning_rate": 1.7442455242966754e-05,
"loss": 1.2231,
"step": 150
},
{
"epoch": 0.38,
"eval_accuracy": 0.6184,
"eval_f1_macro": 0.47063282197492184,
"eval_f1_micro": 0.6184,
"eval_loss": 1.1279376745224,
"eval_runtime": 4.6588,
"eval_samples_per_second": 1073.247,
"eval_steps_per_second": 16.957,
"step": 150
},
{
"epoch": 0.41,
"grad_norm": 6.855159759521484,
"learning_rate": 1.727195225916454e-05,
"loss": 1.0786,
"step": 160
},
{
"epoch": 0.43,
"grad_norm": 8.450016021728516,
"learning_rate": 1.710144927536232e-05,
"loss": 1.2323,
"step": 170
},
{
"epoch": 0.46,
"grad_norm": 9.08232307434082,
"learning_rate": 1.6930946291560104e-05,
"loss": 1.1601,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 6.015944480895996,
"learning_rate": 1.676044330775789e-05,
"loss": 1.2161,
"step": 190
},
{
"epoch": 0.51,
"grad_norm": 6.851879119873047,
"learning_rate": 1.658994032395567e-05,
"loss": 1.1169,
"step": 200
},
{
"epoch": 0.51,
"eval_accuracy": 0.6346,
"eval_f1_macro": 0.501600026681922,
"eval_f1_micro": 0.6346,
"eval_loss": 1.0773311853408813,
"eval_runtime": 4.675,
"eval_samples_per_second": 1069.526,
"eval_steps_per_second": 16.899,
"step": 200
},
{
"epoch": 0.54,
"grad_norm": 9.108482360839844,
"learning_rate": 1.6419437340153455e-05,
"loss": 1.1574,
"step": 210
},
{
"epoch": 0.56,
"grad_norm": 5.855172157287598,
"learning_rate": 1.624893435635124e-05,
"loss": 1.0912,
"step": 220
},
{
"epoch": 0.59,
"grad_norm": 6.393794536590576,
"learning_rate": 1.607843137254902e-05,
"loss": 1.0653,
"step": 230
},
{
"epoch": 0.61,
"grad_norm": 11.836044311523438,
"learning_rate": 1.5907928388746805e-05,
"loss": 1.0897,
"step": 240
},
{
"epoch": 0.64,
"grad_norm": 6.626937389373779,
"learning_rate": 1.573742540494459e-05,
"loss": 1.1195,
"step": 250
},
{
"epoch": 0.64,
"eval_accuracy": 0.6398,
"eval_f1_macro": 0.5181922250775877,
"eval_f1_micro": 0.6398,
"eval_loss": 1.068590521812439,
"eval_runtime": 4.683,
"eval_samples_per_second": 1067.686,
"eval_steps_per_second": 16.869,
"step": 250
},
{
"epoch": 0.66,
"grad_norm": 9.058147430419922,
"learning_rate": 1.556692242114237e-05,
"loss": 1.1408,
"step": 260
},
{
"epoch": 0.69,
"grad_norm": 7.55896520614624,
"learning_rate": 1.5396419437340155e-05,
"loss": 1.1246,
"step": 270
},
{
"epoch": 0.72,
"grad_norm": 7.467323303222656,
"learning_rate": 1.5225916453537938e-05,
"loss": 1.0448,
"step": 280
},
{
"epoch": 0.74,
"grad_norm": 8.626157760620117,
"learning_rate": 1.5055413469735723e-05,
"loss": 1.0834,
"step": 290
},
{
"epoch": 0.77,
"grad_norm": 7.654249668121338,
"learning_rate": 1.4884910485933506e-05,
"loss": 1.0737,
"step": 300
},
{
"epoch": 0.77,
"eval_accuracy": 0.6426,
"eval_f1_macro": 0.5232106803544677,
"eval_f1_micro": 0.6426,
"eval_loss": 1.0547648668289185,
"eval_runtime": 4.6943,
"eval_samples_per_second": 1065.121,
"eval_steps_per_second": 16.829,
"step": 300
},
{
"epoch": 0.79,
"grad_norm": 8.641997337341309,
"learning_rate": 1.4714407502131289e-05,
"loss": 1.0827,
"step": 310
},
{
"epoch": 0.82,
"grad_norm": 7.739094257354736,
"learning_rate": 1.4543904518329073e-05,
"loss": 1.0868,
"step": 320
},
{
"epoch": 0.84,
"grad_norm": 6.531001091003418,
"learning_rate": 1.4373401534526856e-05,
"loss": 1.038,
"step": 330
},
{
"epoch": 0.87,
"grad_norm": 5.194310188293457,
"learning_rate": 1.420289855072464e-05,
"loss": 1.064,
"step": 340
},
{
"epoch": 0.9,
"grad_norm": 5.974400997161865,
"learning_rate": 1.4032395566922424e-05,
"loss": 1.0981,
"step": 350
},
{
"epoch": 0.9,
"eval_accuracy": 0.6376,
"eval_f1_macro": 0.5604579153144441,
"eval_f1_micro": 0.6376,
"eval_loss": 1.043789267539978,
"eval_runtime": 4.6993,
"eval_samples_per_second": 1063.984,
"eval_steps_per_second": 16.811,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 8.46001148223877,
"learning_rate": 1.3861892583120207e-05,
"loss": 1.102,
"step": 360
},
{
"epoch": 0.95,
"grad_norm": 8.373648643493652,
"learning_rate": 1.369138959931799e-05,
"loss": 1.0584,
"step": 370
},
{
"epoch": 0.97,
"grad_norm": 7.385919094085693,
"learning_rate": 1.3520886615515774e-05,
"loss": 1.0176,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 7.325735092163086,
"learning_rate": 1.3350383631713557e-05,
"loss": 1.092,
"step": 390
},
{
"epoch": 1.02,
"grad_norm": 7.1516008377075195,
"learning_rate": 1.317988064791134e-05,
"loss": 1.0147,
"step": 400
},
{
"epoch": 1.02,
"eval_accuracy": 0.6606,
"eval_f1_macro": 0.5852129283356313,
"eval_f1_micro": 0.6606,
"eval_loss": 0.9970415830612183,
"eval_runtime": 4.6968,
"eval_samples_per_second": 1064.548,
"eval_steps_per_second": 16.82,
"step": 400
},
{
"epoch": 1.05,
"grad_norm": 6.275845050811768,
"learning_rate": 1.3009377664109125e-05,
"loss": 0.9861,
"step": 410
},
{
"epoch": 1.07,
"grad_norm": 8.04443645477295,
"learning_rate": 1.2838874680306906e-05,
"loss": 0.9872,
"step": 420
},
{
"epoch": 1.1,
"grad_norm": 8.21890926361084,
"learning_rate": 1.2668371696504689e-05,
"loss": 0.9606,
"step": 430
},
{
"epoch": 1.13,
"grad_norm": 7.705150604248047,
"learning_rate": 1.2497868712702472e-05,
"loss": 1.0163,
"step": 440
},
{
"epoch": 1.15,
"grad_norm": 6.80154275894165,
"learning_rate": 1.2327365728900256e-05,
"loss": 0.9049,
"step": 450
},
{
"epoch": 1.15,
"eval_accuracy": 0.6572,
"eval_f1_macro": 0.5804469606073931,
"eval_f1_micro": 0.6572,
"eval_loss": 1.0097591876983643,
"eval_runtime": 4.6955,
"eval_samples_per_second": 1064.848,
"eval_steps_per_second": 16.825,
"step": 450
},
{
"epoch": 1.18,
"grad_norm": 8.022605895996094,
"learning_rate": 1.215686274509804e-05,
"loss": 0.99,
"step": 460
},
{
"epoch": 1.2,
"grad_norm": 11.028081893920898,
"learning_rate": 1.1986359761295822e-05,
"loss": 0.9621,
"step": 470
},
{
"epoch": 1.23,
"grad_norm": 9.201847076416016,
"learning_rate": 1.1815856777493607e-05,
"loss": 0.9033,
"step": 480
},
{
"epoch": 1.25,
"grad_norm": 9.63035774230957,
"learning_rate": 1.164535379369139e-05,
"loss": 0.9062,
"step": 490
},
{
"epoch": 1.28,
"grad_norm": 9.333333969116211,
"learning_rate": 1.1474850809889173e-05,
"loss": 0.945,
"step": 500
},
{
"epoch": 1.28,
"eval_accuracy": 0.662,
"eval_f1_macro": 0.5872747578923891,
"eval_f1_micro": 0.662,
"eval_loss": 0.9906530380249023,
"eval_runtime": 4.6972,
"eval_samples_per_second": 1064.474,
"eval_steps_per_second": 16.819,
"step": 500
},
{
"epoch": 1.3,
"grad_norm": 6.602180004119873,
"learning_rate": 1.1304347826086957e-05,
"loss": 0.9584,
"step": 510
},
{
"epoch": 1.33,
"grad_norm": 10.108154296875,
"learning_rate": 1.113384484228474e-05,
"loss": 0.9882,
"step": 520
},
{
"epoch": 1.36,
"grad_norm": 9.116514205932617,
"learning_rate": 1.0963341858482523e-05,
"loss": 0.9376,
"step": 530
},
{
"epoch": 1.38,
"grad_norm": 8.690980911254883,
"learning_rate": 1.0792838874680308e-05,
"loss": 0.944,
"step": 540
},
{
"epoch": 1.41,
"grad_norm": 7.5809831619262695,
"learning_rate": 1.062233589087809e-05,
"loss": 0.9206,
"step": 550
},
{
"epoch": 1.41,
"eval_accuracy": 0.6636,
"eval_f1_macro": 0.5777404076748278,
"eval_f1_micro": 0.6636,
"eval_loss": 0.9864922761917114,
"eval_runtime": 4.6971,
"eval_samples_per_second": 1064.478,
"eval_steps_per_second": 16.819,
"step": 550
},
{
"epoch": 1.43,
"grad_norm": 9.885359764099121,
"learning_rate": 1.0451832907075873e-05,
"loss": 0.9316,
"step": 560
},
{
"epoch": 1.46,
"grad_norm": 9.082917213439941,
"learning_rate": 1.0281329923273658e-05,
"loss": 0.9554,
"step": 570
},
{
"epoch": 1.48,
"grad_norm": 8.62850284576416,
"learning_rate": 1.0110826939471441e-05,
"loss": 0.8987,
"step": 580
},
{
"epoch": 1.51,
"grad_norm": 9.194178581237793,
"learning_rate": 9.940323955669226e-06,
"loss": 0.8146,
"step": 590
},
{
"epoch": 1.53,
"grad_norm": 8.861343383789062,
"learning_rate": 9.769820971867009e-06,
"loss": 0.9263,
"step": 600
},
{
"epoch": 1.53,
"eval_accuracy": 0.6664,
"eval_f1_macro": 0.5968165647393973,
"eval_f1_micro": 0.6664,
"eval_loss": 0.9686366319656372,
"eval_runtime": 4.6944,
"eval_samples_per_second": 1065.103,
"eval_steps_per_second": 16.829,
"step": 600
},
{
"epoch": 1.56,
"grad_norm": 8.089481353759766,
"learning_rate": 9.599317988064793e-06,
"loss": 0.9515,
"step": 610
},
{
"epoch": 1.59,
"grad_norm": 8.640591621398926,
"learning_rate": 9.428815004262576e-06,
"loss": 0.9406,
"step": 620
},
{
"epoch": 1.61,
"grad_norm": 12.582009315490723,
"learning_rate": 9.258312020460359e-06,
"loss": 1.0249,
"step": 630
},
{
"epoch": 1.64,
"grad_norm": 10.242859840393066,
"learning_rate": 9.087809036658142e-06,
"loss": 0.838,
"step": 640
},
{
"epoch": 1.66,
"grad_norm": 9.301600456237793,
"learning_rate": 8.917306052855925e-06,
"loss": 0.9629,
"step": 650
},
{
"epoch": 1.66,
"eval_accuracy": 0.666,
"eval_f1_macro": 0.5941368953862343,
"eval_f1_micro": 0.666,
"eval_loss": 0.9790616035461426,
"eval_runtime": 4.693,
"eval_samples_per_second": 1065.419,
"eval_steps_per_second": 16.834,
"step": 650
},
{
"epoch": 1.69,
"grad_norm": 7.933264255523682,
"learning_rate": 8.74680306905371e-06,
"loss": 0.9275,
"step": 660
},
{
"epoch": 1.71,
"grad_norm": 6.248515605926514,
"learning_rate": 8.576300085251492e-06,
"loss": 0.9821,
"step": 670
},
{
"epoch": 1.74,
"grad_norm": 9.65717887878418,
"learning_rate": 8.405797101449275e-06,
"loss": 0.9896,
"step": 680
},
{
"epoch": 1.76,
"grad_norm": 8.450819969177246,
"learning_rate": 8.23529411764706e-06,
"loss": 0.8661,
"step": 690
},
{
"epoch": 1.79,
"grad_norm": 10.561975479125977,
"learning_rate": 8.064791133844843e-06,
"loss": 0.8913,
"step": 700
},
{
"epoch": 1.79,
"eval_accuracy": 0.6746,
"eval_f1_macro": 0.600228948901954,
"eval_f1_micro": 0.6746,
"eval_loss": 0.9578554630279541,
"eval_runtime": 4.6945,
"eval_samples_per_second": 1065.084,
"eval_steps_per_second": 16.828,
"step": 700
},
{
"epoch": 1.82,
"grad_norm": 8.024334907531738,
"learning_rate": 7.894288150042626e-06,
"loss": 0.9376,
"step": 710
},
{
"epoch": 1.84,
"grad_norm": 8.777924537658691,
"learning_rate": 7.72378516624041e-06,
"loss": 0.995,
"step": 720
},
{
"epoch": 1.87,
"grad_norm": 8.383976936340332,
"learning_rate": 7.553282182438193e-06,
"loss": 0.9258,
"step": 730
},
{
"epoch": 1.89,
"grad_norm": 7.384089469909668,
"learning_rate": 7.382779198635977e-06,
"loss": 0.982,
"step": 740
},
{
"epoch": 1.92,
"grad_norm": 7.771151542663574,
"learning_rate": 7.21227621483376e-06,
"loss": 0.96,
"step": 750
},
{
"epoch": 1.92,
"eval_accuracy": 0.6696,
"eval_f1_macro": 0.6024537798111538,
"eval_f1_micro": 0.6696,
"eval_loss": 0.9523646235466003,
"eval_runtime": 4.6912,
"eval_samples_per_second": 1065.829,
"eval_steps_per_second": 16.84,
"step": 750
},
{
"epoch": 1.94,
"grad_norm": 9.032242774963379,
"learning_rate": 7.0417732310315436e-06,
"loss": 0.972,
"step": 760
},
{
"epoch": 1.97,
"grad_norm": 8.851130485534668,
"learning_rate": 6.8712702472293265e-06,
"loss": 0.9154,
"step": 770
},
{
"epoch": 1.99,
"grad_norm": 8.968002319335938,
"learning_rate": 6.70076726342711e-06,
"loss": 0.96,
"step": 780
},
{
"epoch": 2.02,
"grad_norm": 8.527153015136719,
"learning_rate": 6.530264279624894e-06,
"loss": 0.8241,
"step": 790
},
{
"epoch": 2.05,
"grad_norm": 7.565133571624756,
"learning_rate": 6.359761295822677e-06,
"loss": 0.8284,
"step": 800
},
{
"epoch": 2.05,
"eval_accuracy": 0.6738,
"eval_f1_macro": 0.607303709282453,
"eval_f1_micro": 0.6738,
"eval_loss": 0.9539583921432495,
"eval_runtime": 4.6905,
"eval_samples_per_second": 1065.982,
"eval_steps_per_second": 16.843,
"step": 800
},
{
"epoch": 2.07,
"grad_norm": 13.65481948852539,
"learning_rate": 6.189258312020461e-06,
"loss": 0.8675,
"step": 810
},
{
"epoch": 2.1,
"grad_norm": 6.871121883392334,
"learning_rate": 6.018755328218244e-06,
"loss": 0.7866,
"step": 820
},
{
"epoch": 2.12,
"grad_norm": 11.004075050354004,
"learning_rate": 5.848252344416027e-06,
"loss": 0.8634,
"step": 830
},
{
"epoch": 2.15,
"grad_norm": 6.931361198425293,
"learning_rate": 5.677749360613811e-06,
"loss": 0.8855,
"step": 840
},
{
"epoch": 2.17,
"grad_norm": 8.874567985534668,
"learning_rate": 5.507246376811595e-06,
"loss": 0.8558,
"step": 850
},
{
"epoch": 2.17,
"eval_accuracy": 0.6742,
"eval_f1_macro": 0.594933677189279,
"eval_f1_micro": 0.6742,
"eval_loss": 0.9495751857757568,
"eval_runtime": 4.6887,
"eval_samples_per_second": 1066.398,
"eval_steps_per_second": 16.849,
"step": 850
},
{
"epoch": 2.2,
"grad_norm": 9.37259292602539,
"learning_rate": 5.336743393009378e-06,
"loss": 0.8929,
"step": 860
},
{
"epoch": 2.23,
"grad_norm": 10.430830955505371,
"learning_rate": 5.1662404092071615e-06,
"loss": 0.8427,
"step": 870
},
{
"epoch": 2.25,
"grad_norm": 6.501585960388184,
"learning_rate": 4.995737425404945e-06,
"loss": 0.9074,
"step": 880
},
{
"epoch": 2.28,
"grad_norm": 9.332304000854492,
"learning_rate": 4.825234441602728e-06,
"loss": 0.8769,
"step": 890
},
{
"epoch": 2.3,
"grad_norm": 8.06771183013916,
"learning_rate": 4.654731457800512e-06,
"loss": 0.9643,
"step": 900
},
{
"epoch": 2.3,
"eval_accuracy": 0.6748,
"eval_f1_macro": 0.6074089654591388,
"eval_f1_micro": 0.6748,
"eval_loss": 0.9519968628883362,
"eval_runtime": 4.6886,
"eval_samples_per_second": 1066.415,
"eval_steps_per_second": 16.849,
"step": 900
},
{
"epoch": 2.33,
"grad_norm": 9.928189277648926,
"learning_rate": 4.484228473998296e-06,
"loss": 0.8591,
"step": 910
},
{
"epoch": 2.35,
"grad_norm": 7.7598772048950195,
"learning_rate": 4.313725490196079e-06,
"loss": 0.8091,
"step": 920
},
{
"epoch": 2.38,
"grad_norm": 9.674939155578613,
"learning_rate": 4.143222506393862e-06,
"loss": 0.7889,
"step": 930
},
{
"epoch": 2.4,
"grad_norm": 10.04906177520752,
"learning_rate": 3.972719522591646e-06,
"loss": 0.8251,
"step": 940
},
{
"epoch": 2.43,
"grad_norm": 10.514945983886719,
"learning_rate": 3.802216538789429e-06,
"loss": 0.873,
"step": 950
},
{
"epoch": 2.43,
"eval_accuracy": 0.6732,
"eval_f1_macro": 0.596296648251256,
"eval_f1_micro": 0.6732,
"eval_loss": 0.9520734548568726,
"eval_runtime": 4.6894,
"eval_samples_per_second": 1066.228,
"eval_steps_per_second": 16.846,
"step": 950
},
{
"epoch": 2.46,
"grad_norm": 10.718672752380371,
"learning_rate": 3.6317135549872124e-06,
"loss": 0.8836,
"step": 960
},
{
"epoch": 2.48,
"grad_norm": 10.7012357711792,
"learning_rate": 3.4612105711849957e-06,
"loss": 0.8958,
"step": 970
},
{
"epoch": 2.51,
"grad_norm": 8.067261695861816,
"learning_rate": 3.2907075873827795e-06,
"loss": 0.784,
"step": 980
},
{
"epoch": 2.53,
"grad_norm": 8.40015697479248,
"learning_rate": 3.120204603580563e-06,
"loss": 0.8807,
"step": 990
},
{
"epoch": 2.56,
"grad_norm": 9.301580429077148,
"learning_rate": 2.949701619778346e-06,
"loss": 0.8271,
"step": 1000
},
{
"epoch": 2.56,
"eval_accuracy": 0.6782,
"eval_f1_macro": 0.6122813087810713,
"eval_f1_micro": 0.6782,
"eval_loss": 0.9399372935295105,
"eval_runtime": 4.6876,
"eval_samples_per_second": 1066.637,
"eval_steps_per_second": 16.853,
"step": 1000
},
{
"epoch": 2.58,
"grad_norm": 7.235937118530273,
"learning_rate": 2.77919863597613e-06,
"loss": 0.8525,
"step": 1010
},
{
"epoch": 2.61,
"grad_norm": 7.0617523193359375,
"learning_rate": 2.6086956521739132e-06,
"loss": 0.8082,
"step": 1020
},
{
"epoch": 2.63,
"grad_norm": 7.261152744293213,
"learning_rate": 2.4381926683716966e-06,
"loss": 0.7973,
"step": 1030
},
{
"epoch": 2.66,
"grad_norm": 10.145052909851074,
"learning_rate": 2.2676896845694803e-06,
"loss": 0.8335,
"step": 1040
},
{
"epoch": 2.69,
"grad_norm": 10.394144058227539,
"learning_rate": 2.0971867007672637e-06,
"loss": 0.7572,
"step": 1050
},
{
"epoch": 2.69,
"eval_accuracy": 0.6788,
"eval_f1_macro": 0.6075649716220995,
"eval_f1_micro": 0.6788,
"eval_loss": 0.940003514289856,
"eval_runtime": 4.6862,
"eval_samples_per_second": 1066.96,
"eval_steps_per_second": 16.858,
"step": 1050
},
{
"epoch": 2.71,
"grad_norm": 9.629010200500488,
"learning_rate": 1.926683716965047e-06,
"loss": 0.8284,
"step": 1060
},
{
"epoch": 2.74,
"grad_norm": 11.781661033630371,
"learning_rate": 1.7561807331628305e-06,
"loss": 0.8437,
"step": 1070
},
{
"epoch": 2.76,
"grad_norm": 9.535713195800781,
"learning_rate": 1.585677749360614e-06,
"loss": 0.8281,
"step": 1080
},
{
"epoch": 2.79,
"grad_norm": 12.147420883178711,
"learning_rate": 1.4151747655583974e-06,
"loss": 0.7891,
"step": 1090
},
{
"epoch": 2.81,
"grad_norm": 8.067042350769043,
"learning_rate": 1.2446717817561808e-06,
"loss": 0.7949,
"step": 1100
},
{
"epoch": 2.81,
"eval_accuracy": 0.6808,
"eval_f1_macro": 0.6098706235096221,
"eval_f1_micro": 0.6808,
"eval_loss": 0.9386488199234009,
"eval_runtime": 4.687,
"eval_samples_per_second": 1066.791,
"eval_steps_per_second": 16.855,
"step": 1100
},
{
"epoch": 2.84,
"grad_norm": 7.676300525665283,
"learning_rate": 1.0741687979539643e-06,
"loss": 0.8565,
"step": 1110
},
{
"epoch": 2.86,
"grad_norm": 9.350565910339355,
"learning_rate": 9.036658141517478e-07,
"loss": 0.8433,
"step": 1120
},
{
"epoch": 2.89,
"grad_norm": 7.868011474609375,
"learning_rate": 7.331628303495311e-07,
"loss": 0.8412,
"step": 1130
},
{
"epoch": 2.92,
"grad_norm": 9.809996604919434,
"learning_rate": 5.626598465473146e-07,
"loss": 0.8546,
"step": 1140
},
{
"epoch": 2.94,
"grad_norm": 10.371634483337402,
"learning_rate": 3.921568627450981e-07,
"loss": 0.8183,
"step": 1150
},
{
"epoch": 2.94,
"eval_accuracy": 0.679,
"eval_f1_macro": 0.6097649958285241,
"eval_f1_micro": 0.679,
"eval_loss": 0.9373559355735779,
"eval_runtime": 4.6836,
"eval_samples_per_second": 1067.561,
"eval_steps_per_second": 16.867,
"step": 1150
},
{
"epoch": 2.97,
"grad_norm": 8.789111137390137,
"learning_rate": 2.2165387894288152e-07,
"loss": 0.7812,
"step": 1160
},
{
"epoch": 2.99,
"grad_norm": 9.823261260986328,
"learning_rate": 5.115089514066497e-08,
"loss": 0.8309,
"step": 1170
},
{
"epoch": 3.0,
"step": 1173,
"total_flos": 4938378618339328.0,
"train_loss": 1.0076844364989037,
"train_runtime": 372.6672,
"train_samples_per_second": 201.252,
"train_steps_per_second": 3.148
}
],
"logging_steps": 10,
"max_steps": 1173,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 4938378618339328.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}