spanclass-qa-dice-wnut17 / trainer_state.json
jatinarora2702's picture
first model version
ffb1109
{
"best_metric": 0.8181818176808394,
"best_model_checkpoint": "../out/wnut/ner-bert-spanclass-dice/checkpoints/checkpoint-3040",
"epoch": 49.03225806451613,
"global_step": 3040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.65,
"learning_rate": 9.978494623655915e-06,
"loss": 0.7609,
"step": 40
},
{
"epoch": 0.65,
"eval_loss": 0.7705183029174805,
"eval_micro_f1": 0.6220095688772463,
"eval_runtime": 5.0386,
"eval_samples_per_second": 165.918,
"step": 40
},
{
"epoch": 1.29,
"learning_rate": 9.95698924731183e-06,
"loss": 0.7133,
"step": 80
},
{
"epoch": 1.29,
"eval_loss": 0.7154462337493896,
"eval_micro_f1": 0.6244019133748512,
"eval_runtime": 2.5719,
"eval_samples_per_second": 325.049,
"step": 80
},
{
"epoch": 1.94,
"learning_rate": 9.935483870967742e-06,
"loss": 0.6324,
"step": 120
},
{
"epoch": 1.94,
"eval_loss": 0.657606840133667,
"eval_micro_f1": 0.759569377489522,
"eval_runtime": 2.5725,
"eval_samples_per_second": 324.971,
"step": 120
},
{
"epoch": 2.58,
"learning_rate": 9.913978494623658e-06,
"loss": 0.5726,
"step": 160
},
{
"epoch": 2.58,
"eval_loss": 0.6323938965797424,
"eval_micro_f1": 0.7511961717479053,
"eval_runtime": 2.5886,
"eval_samples_per_second": 322.959,
"step": 160
},
{
"epoch": 3.23,
"learning_rate": 9.89247311827957e-06,
"loss": 0.5319,
"step": 200
},
{
"epoch": 3.23,
"eval_loss": 0.6058392524719238,
"eval_micro_f1": 0.7822966502167676,
"eval_runtime": 2.5691,
"eval_samples_per_second": 325.4,
"step": 200
},
{
"epoch": 3.87,
"learning_rate": 9.870967741935485e-06,
"loss": 0.4986,
"step": 240
},
{
"epoch": 3.87,
"eval_loss": 0.5896583199501038,
"eval_micro_f1": 0.7882775114607795,
"eval_runtime": 2.5838,
"eval_samples_per_second": 323.554,
"step": 240
},
{
"epoch": 4.52,
"learning_rate": 9.8494623655914e-06,
"loss": 0.4643,
"step": 280
},
{
"epoch": 4.52,
"eval_loss": 0.5731987357139587,
"eval_micro_f1": 0.7858851669631748,
"eval_runtime": 2.573,
"eval_samples_per_second": 324.914,
"step": 280
},
{
"epoch": 5.16,
"learning_rate": 9.827956989247312e-06,
"loss": 0.4218,
"step": 320
},
{
"epoch": 5.16,
"eval_loss": 0.5619742274284363,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 2.5828,
"eval_samples_per_second": 323.679,
"step": 320
},
{
"epoch": 5.81,
"learning_rate": 9.806451612903226e-06,
"loss": 0.3839,
"step": 360
},
{
"epoch": 5.81,
"eval_loss": 0.560495138168335,
"eval_micro_f1": 0.7739234444751508,
"eval_runtime": 2.5822,
"eval_samples_per_second": 323.761,
"step": 360
},
{
"epoch": 6.45,
"learning_rate": 9.78494623655914e-06,
"loss": 0.3533,
"step": 400
},
{
"epoch": 6.45,
"eval_loss": 0.5423776507377625,
"eval_micro_f1": 0.7799043057191627,
"eval_runtime": 5.1636,
"eval_samples_per_second": 161.902,
"step": 400
},
{
"epoch": 7.1,
"learning_rate": 9.763440860215055e-06,
"loss": 0.3185,
"step": 440
},
{
"epoch": 7.1,
"eval_loss": 0.5292896032333374,
"eval_micro_f1": 0.8110047841880251,
"eval_runtime": 2.5902,
"eval_samples_per_second": 322.751,
"step": 440
},
{
"epoch": 7.74,
"learning_rate": 9.74193548387097e-06,
"loss": 0.2787,
"step": 480
},
{
"epoch": 7.74,
"eval_loss": 0.5203776955604553,
"eval_micro_f1": 0.8038277506952107,
"eval_runtime": 2.5776,
"eval_samples_per_second": 324.338,
"step": 480
},
{
"epoch": 8.39,
"learning_rate": 9.720430107526882e-06,
"loss": 0.2826,
"step": 520
},
{
"epoch": 8.39,
"eval_loss": 0.5275840163230896,
"eval_micro_f1": 0.7930622004559891,
"eval_runtime": 5.4507,
"eval_samples_per_second": 153.374,
"step": 520
},
{
"epoch": 9.03,
"learning_rate": 9.698924731182796e-06,
"loss": 0.2484,
"step": 560
},
{
"epoch": 9.03,
"eval_loss": 0.5161808133125305,
"eval_micro_f1": 0.8026315784464082,
"eval_runtime": 2.5785,
"eval_samples_per_second": 324.22,
"step": 560
},
{
"epoch": 9.68,
"learning_rate": 9.67741935483871e-06,
"loss": 0.235,
"step": 600
},
{
"epoch": 9.68,
"eval_loss": 0.5403562784194946,
"eval_micro_f1": 0.7679425832311388,
"eval_runtime": 2.5747,
"eval_samples_per_second": 324.693,
"step": 600
},
{
"epoch": 10.32,
"learning_rate": 9.655913978494625e-06,
"loss": 0.243,
"step": 640
},
{
"epoch": 10.32,
"eval_loss": 0.5164055824279785,
"eval_micro_f1": 0.7906698559583843,
"eval_runtime": 2.5788,
"eval_samples_per_second": 324.187,
"step": 640
},
{
"epoch": 10.97,
"learning_rate": 9.634408602150539e-06,
"loss": 0.2209,
"step": 680
},
{
"epoch": 10.97,
"eval_loss": 0.5173386931419373,
"eval_micro_f1": 0.759569377489522,
"eval_runtime": 2.5761,
"eval_samples_per_second": 324.527,
"step": 680
},
{
"epoch": 11.61,
"learning_rate": 9.612903225806453e-06,
"loss": 0.2149,
"step": 720
},
{
"epoch": 11.61,
"eval_loss": 0.5052957534790039,
"eval_micro_f1": 0.7894736837095819,
"eval_runtime": 2.6571,
"eval_samples_per_second": 314.625,
"step": 720
},
{
"epoch": 12.26,
"learning_rate": 9.591397849462366e-06,
"loss": 0.2225,
"step": 760
},
{
"epoch": 12.26,
"eval_loss": 0.5145884156227112,
"eval_micro_f1": 0.7846889947143724,
"eval_runtime": 2.5782,
"eval_samples_per_second": 324.252,
"step": 760
},
{
"epoch": 12.9,
"learning_rate": 9.56989247311828e-06,
"loss": 0.1907,
"step": 800
},
{
"epoch": 12.9,
"eval_loss": 0.5195938348770142,
"eval_micro_f1": 0.7691387554799413,
"eval_runtime": 2.5773,
"eval_samples_per_second": 324.375,
"step": 800
},
{
"epoch": 13.55,
"learning_rate": 9.548387096774195e-06,
"loss": 0.1792,
"step": 840
},
{
"epoch": 13.55,
"eval_loss": 0.5062447190284729,
"eval_micro_f1": 0.777511961221558,
"eval_runtime": 2.572,
"eval_samples_per_second": 325.035,
"step": 840
},
{
"epoch": 14.19,
"learning_rate": 9.526881720430107e-06,
"loss": 0.1996,
"step": 880
},
{
"epoch": 14.19,
"eval_loss": 0.50869220495224,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 2.6593,
"eval_samples_per_second": 314.373,
"step": 880
},
{
"epoch": 14.84,
"learning_rate": 9.505376344086023e-06,
"loss": 0.218,
"step": 920
},
{
"epoch": 14.84,
"eval_loss": 0.5089221596717834,
"eval_micro_f1": 0.7787081334703604,
"eval_runtime": 4.5287,
"eval_samples_per_second": 184.599,
"step": 920
},
{
"epoch": 15.48,
"learning_rate": 9.483870967741936e-06,
"loss": 0.2129,
"step": 960
},
{
"epoch": 15.48,
"eval_loss": 0.49846795201301575,
"eval_micro_f1": 0.7966507172023963,
"eval_runtime": 2.5705,
"eval_samples_per_second": 325.232,
"step": 960
},
{
"epoch": 16.13,
"learning_rate": 9.46236559139785e-06,
"loss": 0.1791,
"step": 1000
},
{
"epoch": 16.13,
"eval_loss": 0.5086433291435242,
"eval_micro_f1": 0.7930622004559891,
"eval_runtime": 7.0132,
"eval_samples_per_second": 119.204,
"step": 1000
},
{
"epoch": 16.77,
"learning_rate": 9.440860215053764e-06,
"loss": 0.1915,
"step": 1040
},
{
"epoch": 16.77,
"eval_loss": 0.49868449568748474,
"eval_micro_f1": 0.7882775114607795,
"eval_runtime": 2.5701,
"eval_samples_per_second": 325.281,
"step": 1040
},
{
"epoch": 17.42,
"learning_rate": 9.419354838709677e-06,
"loss": 0.1941,
"step": 1080
},
{
"epoch": 17.42,
"eval_loss": 0.4898798167705536,
"eval_micro_f1": 0.8122009564368274,
"eval_runtime": 4.5389,
"eval_samples_per_second": 184.184,
"step": 1080
},
{
"epoch": 18.06,
"learning_rate": 9.397849462365593e-06,
"loss": 0.1925,
"step": 1120
},
{
"epoch": 18.06,
"eval_loss": 0.5056890249252319,
"eval_micro_f1": 0.7882775114607795,
"eval_runtime": 2.5841,
"eval_samples_per_second": 323.522,
"step": 1120
},
{
"epoch": 18.71,
"learning_rate": 9.376344086021506e-06,
"loss": 0.1748,
"step": 1160
},
{
"epoch": 18.71,
"eval_loss": 0.4988904297351837,
"eval_micro_f1": 0.7906698559583843,
"eval_runtime": 4.544,
"eval_samples_per_second": 183.978,
"step": 1160
},
{
"epoch": 19.35,
"learning_rate": 9.35483870967742e-06,
"loss": 0.182,
"step": 1200
},
{
"epoch": 19.35,
"eval_loss": 0.5048949122428894,
"eval_micro_f1": 0.7763157889727555,
"eval_runtime": 2.5956,
"eval_samples_per_second": 322.082,
"step": 1200
},
{
"epoch": 20.0,
"learning_rate": 9.333333333333334e-06,
"loss": 0.1974,
"step": 1240
},
{
"epoch": 20.0,
"eval_loss": 0.5167320370674133,
"eval_micro_f1": 0.761961721987127,
"eval_runtime": 3.8728,
"eval_samples_per_second": 215.864,
"step": 1240
},
{
"epoch": 20.65,
"learning_rate": 9.311827956989249e-06,
"loss": 0.2053,
"step": 1280
},
{
"epoch": 20.65,
"eval_loss": 0.5148649215698242,
"eval_micro_f1": 0.7727272722263484,
"eval_runtime": 4.523,
"eval_samples_per_second": 184.832,
"step": 1280
},
{
"epoch": 21.29,
"learning_rate": 9.290322580645163e-06,
"loss": 0.1591,
"step": 1320
},
{
"epoch": 21.29,
"eval_loss": 0.5132973194122314,
"eval_micro_f1": 0.7799043057191627,
"eval_runtime": 2.5783,
"eval_samples_per_second": 324.249,
"step": 1320
},
{
"epoch": 21.94,
"learning_rate": 9.268817204301076e-06,
"loss": 0.1852,
"step": 1360
},
{
"epoch": 21.94,
"eval_loss": 0.49313730001449585,
"eval_micro_f1": 0.805023922944013,
"eval_runtime": 4.4012,
"eval_samples_per_second": 189.95,
"step": 1360
},
{
"epoch": 22.58,
"learning_rate": 9.24731182795699e-06,
"loss": 0.1705,
"step": 1400
},
{
"epoch": 22.58,
"eval_loss": 0.5020002722740173,
"eval_micro_f1": 0.7954545449535938,
"eval_runtime": 4.3771,
"eval_samples_per_second": 190.994,
"step": 1400
},
{
"epoch": 23.23,
"learning_rate": 9.225806451612904e-06,
"loss": 0.1774,
"step": 1440
},
{
"epoch": 23.23,
"eval_loss": 0.5008808970451355,
"eval_micro_f1": 0.805023922944013,
"eval_runtime": 6.6994,
"eval_samples_per_second": 124.788,
"step": 1440
},
{
"epoch": 23.87,
"learning_rate": 9.204301075268819e-06,
"loss": 0.1843,
"step": 1480
},
{
"epoch": 23.87,
"eval_loss": 0.4905719757080078,
"eval_micro_f1": 0.805023922944013,
"eval_runtime": 4.5018,
"eval_samples_per_second": 185.705,
"step": 1480
},
{
"epoch": 24.52,
"learning_rate": 9.182795698924733e-06,
"loss": 0.1739,
"step": 1520
},
{
"epoch": 24.52,
"eval_loss": 0.49693962931632996,
"eval_micro_f1": 0.78349282246557,
"eval_runtime": 2.8168,
"eval_samples_per_second": 296.786,
"step": 1520
},
{
"epoch": 25.16,
"learning_rate": 9.161290322580645e-06,
"loss": 0.1972,
"step": 1560
},
{
"epoch": 25.16,
"eval_loss": 0.5002193450927734,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 4.442,
"eval_samples_per_second": 188.205,
"step": 1560
},
{
"epoch": 25.81,
"learning_rate": 9.13978494623656e-06,
"loss": 0.1789,
"step": 1600
},
{
"epoch": 25.81,
"eval_loss": 0.49293604493141174,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 2.5791,
"eval_samples_per_second": 324.147,
"step": 1600
},
{
"epoch": 26.45,
"learning_rate": 9.118279569892474e-06,
"loss": 0.1647,
"step": 1640
},
{
"epoch": 26.45,
"eval_loss": 0.5105385184288025,
"eval_micro_f1": 0.7787081334703604,
"eval_runtime": 4.3949,
"eval_samples_per_second": 190.221,
"step": 1640
},
{
"epoch": 27.1,
"learning_rate": 9.096774193548388e-06,
"loss": 0.1606,
"step": 1680
},
{
"epoch": 27.1,
"eval_loss": 0.5043686032295227,
"eval_micro_f1": 0.7822966502167676,
"eval_runtime": 2.574,
"eval_samples_per_second": 324.785,
"step": 1680
},
{
"epoch": 27.74,
"learning_rate": 9.075268817204301e-06,
"loss": 0.1828,
"step": 1720
},
{
"epoch": 27.74,
"eval_loss": 0.49614059925079346,
"eval_micro_f1": 0.7799043057191627,
"eval_runtime": 3.1362,
"eval_samples_per_second": 266.568,
"step": 1720
},
{
"epoch": 28.39,
"learning_rate": 9.053763440860215e-06,
"loss": 0.1655,
"step": 1760
},
{
"epoch": 28.39,
"eval_loss": 0.49682486057281494,
"eval_micro_f1": 0.7846889947143724,
"eval_runtime": 4.4243,
"eval_samples_per_second": 188.957,
"step": 1760
},
{
"epoch": 29.03,
"learning_rate": 9.03225806451613e-06,
"loss": 0.1826,
"step": 1800
},
{
"epoch": 29.03,
"eval_loss": 0.494783490896225,
"eval_micro_f1": 0.7930622004559891,
"eval_runtime": 2.5934,
"eval_samples_per_second": 322.356,
"step": 1800
},
{
"epoch": 29.68,
"learning_rate": 9.010752688172044e-06,
"loss": 0.1573,
"step": 1840
},
{
"epoch": 29.68,
"eval_loss": 0.49120277166366577,
"eval_micro_f1": 0.7954545449535938,
"eval_runtime": 4.5606,
"eval_samples_per_second": 183.308,
"step": 1840
},
{
"epoch": 30.32,
"learning_rate": 8.989247311827958e-06,
"loss": 0.1531,
"step": 1880
},
{
"epoch": 30.32,
"eval_loss": 0.49332737922668457,
"eval_micro_f1": 0.7978468894511987,
"eval_runtime": 2.5787,
"eval_samples_per_second": 324.199,
"step": 1880
},
{
"epoch": 30.97,
"learning_rate": 8.967741935483871e-06,
"loss": 0.1648,
"step": 1920
},
{
"epoch": 30.97,
"eval_loss": 0.49322041869163513,
"eval_micro_f1": 0.7942583727047915,
"eval_runtime": 6.8429,
"eval_samples_per_second": 122.171,
"step": 1920
},
{
"epoch": 31.61,
"learning_rate": 8.946236559139785e-06,
"loss": 0.1524,
"step": 1960
},
{
"epoch": 31.61,
"eval_loss": 0.49640557169914246,
"eval_micro_f1": 0.7906698559583843,
"eval_runtime": 4.4123,
"eval_samples_per_second": 189.472,
"step": 1960
},
{
"epoch": 32.26,
"learning_rate": 8.9247311827957e-06,
"loss": 0.18,
"step": 2000
},
{
"epoch": 32.26,
"eval_loss": 0.48521941900253296,
"eval_micro_f1": 0.801435406197606,
"eval_runtime": 6.7049,
"eval_samples_per_second": 124.686,
"step": 2000
},
{
"epoch": 32.9,
"learning_rate": 8.903225806451614e-06,
"loss": 0.1374,
"step": 2040
},
{
"epoch": 32.9,
"eval_loss": 0.49140119552612305,
"eval_micro_f1": 0.7894736837095819,
"eval_runtime": 6.3223,
"eval_samples_per_second": 132.23,
"step": 2040
},
{
"epoch": 33.55,
"learning_rate": 8.881720430107528e-06,
"loss": 0.1694,
"step": 2080
},
{
"epoch": 33.55,
"eval_loss": 0.4927099347114563,
"eval_micro_f1": 0.7882775114607795,
"eval_runtime": 3.7769,
"eval_samples_per_second": 221.347,
"step": 2080
},
{
"epoch": 34.19,
"learning_rate": 8.86021505376344e-06,
"loss": 0.1601,
"step": 2120
},
{
"epoch": 34.19,
"eval_loss": 0.4943098723888397,
"eval_micro_f1": 0.78349282246557,
"eval_runtime": 4.9398,
"eval_samples_per_second": 169.237,
"step": 2120
},
{
"epoch": 34.84,
"learning_rate": 8.838709677419357e-06,
"loss": 0.1319,
"step": 2160
},
{
"epoch": 34.84,
"eval_loss": 0.4894709587097168,
"eval_micro_f1": 0.7942583727047915,
"eval_runtime": 5.4431,
"eval_samples_per_second": 153.588,
"step": 2160
},
{
"epoch": 35.48,
"learning_rate": 8.81720430107527e-06,
"loss": 0.1273,
"step": 2200
},
{
"epoch": 35.48,
"eval_loss": 0.4995502531528473,
"eval_micro_f1": 0.7858851669631748,
"eval_runtime": 4.4194,
"eval_samples_per_second": 189.168,
"step": 2200
},
{
"epoch": 36.13,
"learning_rate": 8.795698924731184e-06,
"loss": 0.1674,
"step": 2240
},
{
"epoch": 36.13,
"eval_loss": 0.49648988246917725,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 5.2767,
"eval_samples_per_second": 158.432,
"step": 2240
},
{
"epoch": 36.77,
"learning_rate": 8.774193548387098e-06,
"loss": 0.1789,
"step": 2280
},
{
"epoch": 36.77,
"eval_loss": 0.5074620842933655,
"eval_micro_f1": 0.7523923439967076,
"eval_runtime": 2.5902,
"eval_samples_per_second": 322.751,
"step": 2280
},
{
"epoch": 37.42,
"learning_rate": 8.75268817204301e-06,
"loss": 0.1752,
"step": 2320
},
{
"epoch": 37.42,
"eval_loss": 0.49217045307159424,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 4.548,
"eval_samples_per_second": 183.818,
"step": 2320
},
{
"epoch": 38.06,
"learning_rate": 8.731182795698927e-06,
"loss": 0.1476,
"step": 2360
},
{
"epoch": 38.06,
"eval_loss": 0.49000826478004456,
"eval_micro_f1": 0.7942583727047915,
"eval_runtime": 6.9763,
"eval_samples_per_second": 119.835,
"step": 2360
},
{
"epoch": 38.71,
"learning_rate": 8.70967741935484e-06,
"loss": 0.1868,
"step": 2400
},
{
"epoch": 38.71,
"eval_loss": 0.49217844009399414,
"eval_micro_f1": 0.7918660282071867,
"eval_runtime": 3.4731,
"eval_samples_per_second": 240.707,
"step": 2400
},
{
"epoch": 39.35,
"learning_rate": 8.688172043010754e-06,
"loss": 0.1714,
"step": 2440
},
{
"epoch": 39.35,
"eval_loss": 0.49292442202568054,
"eval_micro_f1": 0.7930622004559891,
"eval_runtime": 4.4418,
"eval_samples_per_second": 188.211,
"step": 2440
},
{
"epoch": 40.0,
"learning_rate": 8.666666666666668e-06,
"loss": 0.1466,
"step": 2480
},
{
"epoch": 40.0,
"eval_loss": 0.4939003586769104,
"eval_micro_f1": 0.7906698559583843,
"eval_runtime": 7.0534,
"eval_samples_per_second": 118.524,
"step": 2480
},
{
"epoch": 40.65,
"learning_rate": 8.64516129032258e-06,
"loss": 0.1461,
"step": 2520
},
{
"epoch": 40.65,
"eval_loss": 0.5058996677398682,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 5.5612,
"eval_samples_per_second": 150.327,
"step": 2520
},
{
"epoch": 41.29,
"learning_rate": 8.623655913978495e-06,
"loss": 0.1778,
"step": 2560
},
{
"epoch": 41.29,
"eval_loss": 0.486485093832016,
"eval_micro_f1": 0.8026315784464082,
"eval_runtime": 4.3478,
"eval_samples_per_second": 192.28,
"step": 2560
},
{
"epoch": 41.94,
"learning_rate": 8.602150537634409e-06,
"loss": 0.1675,
"step": 2600
},
{
"epoch": 41.94,
"eval_loss": 0.5116675496101379,
"eval_micro_f1": 0.7739234444751508,
"eval_runtime": 2.6063,
"eval_samples_per_second": 320.761,
"step": 2600
},
{
"epoch": 42.58,
"learning_rate": 8.580645161290323e-06,
"loss": 0.1468,
"step": 2640
},
{
"epoch": 42.58,
"eval_loss": 0.49054816365242004,
"eval_micro_f1": 0.7882775114607795,
"eval_runtime": 4.4425,
"eval_samples_per_second": 188.184,
"step": 2640
},
{
"epoch": 43.23,
"learning_rate": 8.559139784946238e-06,
"loss": 0.1738,
"step": 2680
},
{
"epoch": 43.23,
"eval_loss": 0.4893950819969177,
"eval_micro_f1": 0.7870813392119771,
"eval_runtime": 4.4169,
"eval_samples_per_second": 189.272,
"step": 2680
},
{
"epoch": 43.87,
"learning_rate": 8.537634408602152e-06,
"loss": 0.1579,
"step": 2720
},
{
"epoch": 43.87,
"eval_loss": 0.4949648082256317,
"eval_micro_f1": 0.7858851669631748,
"eval_runtime": 5.9011,
"eval_samples_per_second": 141.669,
"step": 2720
},
{
"epoch": 44.52,
"learning_rate": 8.516129032258065e-06,
"loss": 0.1556,
"step": 2760
},
{
"epoch": 44.52,
"eval_loss": 0.48870715498924255,
"eval_micro_f1": 0.7918660282071867,
"eval_runtime": 7.1164,
"eval_samples_per_second": 117.475,
"step": 2760
},
{
"epoch": 45.16,
"learning_rate": 8.494623655913979e-06,
"loss": 0.169,
"step": 2800
},
{
"epoch": 45.16,
"eval_loss": 0.4876663088798523,
"eval_micro_f1": 0.7978468894511987,
"eval_runtime": 2.5733,
"eval_samples_per_second": 324.876,
"step": 2800
},
{
"epoch": 45.81,
"learning_rate": 8.473118279569893e-06,
"loss": 0.1659,
"step": 2840
},
{
"epoch": 45.81,
"eval_loss": 0.48851579427719116,
"eval_micro_f1": 0.7954545449535938,
"eval_runtime": 7.1719,
"eval_samples_per_second": 116.566,
"step": 2840
},
{
"epoch": 46.45,
"learning_rate": 8.451612903225808e-06,
"loss": 0.1621,
"step": 2880
},
{
"epoch": 46.45,
"eval_loss": 0.48815056681632996,
"eval_micro_f1": 0.7894736837095819,
"eval_runtime": 5.0889,
"eval_samples_per_second": 164.28,
"step": 2880
},
{
"epoch": 47.1,
"learning_rate": 8.430107526881722e-06,
"loss": 0.1667,
"step": 2920
},
{
"epoch": 47.1,
"eval_loss": 0.49084004759788513,
"eval_micro_f1": 0.78349282246557,
"eval_runtime": 2.5855,
"eval_samples_per_second": 323.336,
"step": 2920
},
{
"epoch": 47.74,
"learning_rate": 8.408602150537634e-06,
"loss": 0.1697,
"step": 2960
},
{
"epoch": 47.74,
"eval_loss": 0.4899657368659973,
"eval_micro_f1": 0.78349282246557,
"eval_runtime": 7.1881,
"eval_samples_per_second": 116.303,
"step": 2960
},
{
"epoch": 48.39,
"learning_rate": 8.387096774193549e-06,
"loss": 0.1446,
"step": 3000
},
{
"epoch": 48.39,
"eval_loss": 0.47679710388183594,
"eval_micro_f1": 0.801435406197606,
"eval_runtime": 2.5746,
"eval_samples_per_second": 324.715,
"step": 3000
},
{
"epoch": 49.03,
"learning_rate": 8.365591397849463e-06,
"loss": 0.1364,
"step": 3040
},
{
"epoch": 49.03,
"eval_loss": 0.4702768921852112,
"eval_micro_f1": 0.8181818176808394,
"eval_runtime": 4.5292,
"eval_samples_per_second": 184.582,
"step": 3040
}
],
"max_steps": 18600,
"num_train_epochs": 300,
"total_flos": 4154829689883924,
"trial_name": null,
"trial_params": null
}