| { | |
| "best_metric": 0.8181818176808394, | |
| "best_model_checkpoint": "../out/wnut/ner-bert-spanclass-dice/checkpoints/checkpoint-3040", | |
| "epoch": 49.03225806451613, | |
| "global_step": 3040, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.978494623655915e-06, | |
| "loss": 0.7609, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 0.7705183029174805, | |
| "eval_micro_f1": 0.6220095688772463, | |
| "eval_runtime": 5.0386, | |
| "eval_samples_per_second": 165.918, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 9.95698924731183e-06, | |
| "loss": 0.7133, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.7154462337493896, | |
| "eval_micro_f1": 0.6244019133748512, | |
| "eval_runtime": 2.5719, | |
| "eval_samples_per_second": 325.049, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 9.935483870967742e-06, | |
| "loss": 0.6324, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 0.657606840133667, | |
| "eval_micro_f1": 0.759569377489522, | |
| "eval_runtime": 2.5725, | |
| "eval_samples_per_second": 324.971, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.913978494623658e-06, | |
| "loss": 0.5726, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.6323938965797424, | |
| "eval_micro_f1": 0.7511961717479053, | |
| "eval_runtime": 2.5886, | |
| "eval_samples_per_second": 322.959, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 9.89247311827957e-06, | |
| "loss": 0.5319, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_loss": 0.6058392524719238, | |
| "eval_micro_f1": 0.7822966502167676, | |
| "eval_runtime": 2.5691, | |
| "eval_samples_per_second": 325.4, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 9.870967741935485e-06, | |
| "loss": 0.4986, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "eval_loss": 0.5896583199501038, | |
| "eval_micro_f1": 0.7882775114607795, | |
| "eval_runtime": 2.5838, | |
| "eval_samples_per_second": 323.554, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 9.8494623655914e-06, | |
| "loss": 0.4643, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "eval_loss": 0.5731987357139587, | |
| "eval_micro_f1": 0.7858851669631748, | |
| "eval_runtime": 2.573, | |
| "eval_samples_per_second": 324.914, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 9.827956989247312e-06, | |
| "loss": 0.4218, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "eval_loss": 0.5619742274284363, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 2.5828, | |
| "eval_samples_per_second": 323.679, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 9.806451612903226e-06, | |
| "loss": 0.3839, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "eval_loss": 0.560495138168335, | |
| "eval_micro_f1": 0.7739234444751508, | |
| "eval_runtime": 2.5822, | |
| "eval_samples_per_second": 323.761, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 9.78494623655914e-06, | |
| "loss": 0.3533, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_loss": 0.5423776507377625, | |
| "eval_micro_f1": 0.7799043057191627, | |
| "eval_runtime": 5.1636, | |
| "eval_samples_per_second": 161.902, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 9.763440860215055e-06, | |
| "loss": 0.3185, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_loss": 0.5292896032333374, | |
| "eval_micro_f1": 0.8110047841880251, | |
| "eval_runtime": 2.5902, | |
| "eval_samples_per_second": 322.751, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 9.74193548387097e-06, | |
| "loss": 0.2787, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "eval_loss": 0.5203776955604553, | |
| "eval_micro_f1": 0.8038277506952107, | |
| "eval_runtime": 2.5776, | |
| "eval_samples_per_second": 324.338, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 9.720430107526882e-06, | |
| "loss": 0.2826, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "eval_loss": 0.5275840163230896, | |
| "eval_micro_f1": 0.7930622004559891, | |
| "eval_runtime": 5.4507, | |
| "eval_samples_per_second": 153.374, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 9.698924731182796e-06, | |
| "loss": 0.2484, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "eval_loss": 0.5161808133125305, | |
| "eval_micro_f1": 0.8026315784464082, | |
| "eval_runtime": 2.5785, | |
| "eval_samples_per_second": 324.22, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 9.67741935483871e-06, | |
| "loss": 0.235, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_loss": 0.5403562784194946, | |
| "eval_micro_f1": 0.7679425832311388, | |
| "eval_runtime": 2.5747, | |
| "eval_samples_per_second": 324.693, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 9.655913978494625e-06, | |
| "loss": 0.243, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "eval_loss": 0.5164055824279785, | |
| "eval_micro_f1": 0.7906698559583843, | |
| "eval_runtime": 2.5788, | |
| "eval_samples_per_second": 324.187, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 9.634408602150539e-06, | |
| "loss": 0.2209, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "eval_loss": 0.5173386931419373, | |
| "eval_micro_f1": 0.759569377489522, | |
| "eval_runtime": 2.5761, | |
| "eval_samples_per_second": 324.527, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 9.612903225806453e-06, | |
| "loss": 0.2149, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "eval_loss": 0.5052957534790039, | |
| "eval_micro_f1": 0.7894736837095819, | |
| "eval_runtime": 2.6571, | |
| "eval_samples_per_second": 314.625, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "learning_rate": 9.591397849462366e-06, | |
| "loss": 0.2225, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "eval_loss": 0.5145884156227112, | |
| "eval_micro_f1": 0.7846889947143724, | |
| "eval_runtime": 2.5782, | |
| "eval_samples_per_second": 324.252, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 9.56989247311828e-06, | |
| "loss": 0.1907, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "eval_loss": 0.5195938348770142, | |
| "eval_micro_f1": 0.7691387554799413, | |
| "eval_runtime": 2.5773, | |
| "eval_samples_per_second": 324.375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "learning_rate": 9.548387096774195e-06, | |
| "loss": 0.1792, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "eval_loss": 0.5062447190284729, | |
| "eval_micro_f1": 0.777511961221558, | |
| "eval_runtime": 2.572, | |
| "eval_samples_per_second": 325.035, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "learning_rate": 9.526881720430107e-06, | |
| "loss": 0.1996, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "eval_loss": 0.50869220495224, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 2.6593, | |
| "eval_samples_per_second": 314.373, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "learning_rate": 9.505376344086023e-06, | |
| "loss": 0.218, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "eval_loss": 0.5089221596717834, | |
| "eval_micro_f1": 0.7787081334703604, | |
| "eval_runtime": 4.5287, | |
| "eval_samples_per_second": 184.599, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "learning_rate": 9.483870967741936e-06, | |
| "loss": 0.2129, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "eval_loss": 0.49846795201301575, | |
| "eval_micro_f1": 0.7966507172023963, | |
| "eval_runtime": 2.5705, | |
| "eval_samples_per_second": 325.232, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "learning_rate": 9.46236559139785e-06, | |
| "loss": 0.1791, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "eval_loss": 0.5086433291435242, | |
| "eval_micro_f1": 0.7930622004559891, | |
| "eval_runtime": 7.0132, | |
| "eval_samples_per_second": 119.204, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "learning_rate": 9.440860215053764e-06, | |
| "loss": 0.1915, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "eval_loss": 0.49868449568748474, | |
| "eval_micro_f1": 0.7882775114607795, | |
| "eval_runtime": 2.5701, | |
| "eval_samples_per_second": 325.281, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 9.419354838709677e-06, | |
| "loss": 0.1941, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "eval_loss": 0.4898798167705536, | |
| "eval_micro_f1": 0.8122009564368274, | |
| "eval_runtime": 4.5389, | |
| "eval_samples_per_second": 184.184, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 9.397849462365593e-06, | |
| "loss": 0.1925, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "eval_loss": 0.5056890249252319, | |
| "eval_micro_f1": 0.7882775114607795, | |
| "eval_runtime": 2.5841, | |
| "eval_samples_per_second": 323.522, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "learning_rate": 9.376344086021506e-06, | |
| "loss": 0.1748, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "eval_loss": 0.4988904297351837, | |
| "eval_micro_f1": 0.7906698559583843, | |
| "eval_runtime": 4.544, | |
| "eval_samples_per_second": 183.978, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 9.35483870967742e-06, | |
| "loss": 0.182, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "eval_loss": 0.5048949122428894, | |
| "eval_micro_f1": 0.7763157889727555, | |
| "eval_runtime": 2.5956, | |
| "eval_samples_per_second": 322.082, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.1974, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.5167320370674133, | |
| "eval_micro_f1": 0.761961721987127, | |
| "eval_runtime": 3.8728, | |
| "eval_samples_per_second": 215.864, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 9.311827956989249e-06, | |
| "loss": 0.2053, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "eval_loss": 0.5148649215698242, | |
| "eval_micro_f1": 0.7727272722263484, | |
| "eval_runtime": 4.523, | |
| "eval_samples_per_second": 184.832, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "learning_rate": 9.290322580645163e-06, | |
| "loss": 0.1591, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "eval_loss": 0.5132973194122314, | |
| "eval_micro_f1": 0.7799043057191627, | |
| "eval_runtime": 2.5783, | |
| "eval_samples_per_second": 324.249, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 9.268817204301076e-06, | |
| "loss": 0.1852, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "eval_loss": 0.49313730001449585, | |
| "eval_micro_f1": 0.805023922944013, | |
| "eval_runtime": 4.4012, | |
| "eval_samples_per_second": 189.95, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "learning_rate": 9.24731182795699e-06, | |
| "loss": 0.1705, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "eval_loss": 0.5020002722740173, | |
| "eval_micro_f1": 0.7954545449535938, | |
| "eval_runtime": 4.3771, | |
| "eval_samples_per_second": 190.994, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 9.225806451612904e-06, | |
| "loss": 0.1774, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "eval_loss": 0.5008808970451355, | |
| "eval_micro_f1": 0.805023922944013, | |
| "eval_runtime": 6.6994, | |
| "eval_samples_per_second": 124.788, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "learning_rate": 9.204301075268819e-06, | |
| "loss": 0.1843, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "eval_loss": 0.4905719757080078, | |
| "eval_micro_f1": 0.805023922944013, | |
| "eval_runtime": 4.5018, | |
| "eval_samples_per_second": 185.705, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "learning_rate": 9.182795698924733e-06, | |
| "loss": 0.1739, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "eval_loss": 0.49693962931632996, | |
| "eval_micro_f1": 0.78349282246557, | |
| "eval_runtime": 2.8168, | |
| "eval_samples_per_second": 296.786, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 9.161290322580645e-06, | |
| "loss": 0.1972, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "eval_loss": 0.5002193450927734, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 4.442, | |
| "eval_samples_per_second": 188.205, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "learning_rate": 9.13978494623656e-06, | |
| "loss": 0.1789, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "eval_loss": 0.49293604493141174, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 2.5791, | |
| "eval_samples_per_second": 324.147, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 9.118279569892474e-06, | |
| "loss": 0.1647, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "eval_loss": 0.5105385184288025, | |
| "eval_micro_f1": 0.7787081334703604, | |
| "eval_runtime": 4.3949, | |
| "eval_samples_per_second": 190.221, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "learning_rate": 9.096774193548388e-06, | |
| "loss": 0.1606, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "eval_loss": 0.5043686032295227, | |
| "eval_micro_f1": 0.7822966502167676, | |
| "eval_runtime": 2.574, | |
| "eval_samples_per_second": 324.785, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "learning_rate": 9.075268817204301e-06, | |
| "loss": 0.1828, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "eval_loss": 0.49614059925079346, | |
| "eval_micro_f1": 0.7799043057191627, | |
| "eval_runtime": 3.1362, | |
| "eval_samples_per_second": 266.568, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 9.053763440860215e-06, | |
| "loss": 0.1655, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "eval_loss": 0.49682486057281494, | |
| "eval_micro_f1": 0.7846889947143724, | |
| "eval_runtime": 4.4243, | |
| "eval_samples_per_second": 188.957, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "learning_rate": 9.03225806451613e-06, | |
| "loss": 0.1826, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "eval_loss": 0.494783490896225, | |
| "eval_micro_f1": 0.7930622004559891, | |
| "eval_runtime": 2.5934, | |
| "eval_samples_per_second": 322.356, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 29.68, | |
| "learning_rate": 9.010752688172044e-06, | |
| "loss": 0.1573, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 29.68, | |
| "eval_loss": 0.49120277166366577, | |
| "eval_micro_f1": 0.7954545449535938, | |
| "eval_runtime": 4.5606, | |
| "eval_samples_per_second": 183.308, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 30.32, | |
| "learning_rate": 8.989247311827958e-06, | |
| "loss": 0.1531, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 30.32, | |
| "eval_loss": 0.49332737922668457, | |
| "eval_micro_f1": 0.7978468894511987, | |
| "eval_runtime": 2.5787, | |
| "eval_samples_per_second": 324.199, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 30.97, | |
| "learning_rate": 8.967741935483871e-06, | |
| "loss": 0.1648, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 30.97, | |
| "eval_loss": 0.49322041869163513, | |
| "eval_micro_f1": 0.7942583727047915, | |
| "eval_runtime": 6.8429, | |
| "eval_samples_per_second": 122.171, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "learning_rate": 8.946236559139785e-06, | |
| "loss": 0.1524, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "eval_loss": 0.49640557169914246, | |
| "eval_micro_f1": 0.7906698559583843, | |
| "eval_runtime": 4.4123, | |
| "eval_samples_per_second": 189.472, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 32.26, | |
| "learning_rate": 8.9247311827957e-06, | |
| "loss": 0.18, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 32.26, | |
| "eval_loss": 0.48521941900253296, | |
| "eval_micro_f1": 0.801435406197606, | |
| "eval_runtime": 6.7049, | |
| "eval_samples_per_second": 124.686, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "learning_rate": 8.903225806451614e-06, | |
| "loss": 0.1374, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "eval_loss": 0.49140119552612305, | |
| "eval_micro_f1": 0.7894736837095819, | |
| "eval_runtime": 6.3223, | |
| "eval_samples_per_second": 132.23, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 33.55, | |
| "learning_rate": 8.881720430107528e-06, | |
| "loss": 0.1694, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 33.55, | |
| "eval_loss": 0.4927099347114563, | |
| "eval_micro_f1": 0.7882775114607795, | |
| "eval_runtime": 3.7769, | |
| "eval_samples_per_second": 221.347, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 34.19, | |
| "learning_rate": 8.86021505376344e-06, | |
| "loss": 0.1601, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 34.19, | |
| "eval_loss": 0.4943098723888397, | |
| "eval_micro_f1": 0.78349282246557, | |
| "eval_runtime": 4.9398, | |
| "eval_samples_per_second": 169.237, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 34.84, | |
| "learning_rate": 8.838709677419357e-06, | |
| "loss": 0.1319, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 34.84, | |
| "eval_loss": 0.4894709587097168, | |
| "eval_micro_f1": 0.7942583727047915, | |
| "eval_runtime": 5.4431, | |
| "eval_samples_per_second": 153.588, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 35.48, | |
| "learning_rate": 8.81720430107527e-06, | |
| "loss": 0.1273, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 35.48, | |
| "eval_loss": 0.4995502531528473, | |
| "eval_micro_f1": 0.7858851669631748, | |
| "eval_runtime": 4.4194, | |
| "eval_samples_per_second": 189.168, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 36.13, | |
| "learning_rate": 8.795698924731184e-06, | |
| "loss": 0.1674, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 36.13, | |
| "eval_loss": 0.49648988246917725, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 5.2767, | |
| "eval_samples_per_second": 158.432, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 36.77, | |
| "learning_rate": 8.774193548387098e-06, | |
| "loss": 0.1789, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 36.77, | |
| "eval_loss": 0.5074620842933655, | |
| "eval_micro_f1": 0.7523923439967076, | |
| "eval_runtime": 2.5902, | |
| "eval_samples_per_second": 322.751, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 37.42, | |
| "learning_rate": 8.75268817204301e-06, | |
| "loss": 0.1752, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 37.42, | |
| "eval_loss": 0.49217045307159424, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 4.548, | |
| "eval_samples_per_second": 183.818, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "learning_rate": 8.731182795698927e-06, | |
| "loss": 0.1476, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "eval_loss": 0.49000826478004456, | |
| "eval_micro_f1": 0.7942583727047915, | |
| "eval_runtime": 6.9763, | |
| "eval_samples_per_second": 119.835, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 38.71, | |
| "learning_rate": 8.70967741935484e-06, | |
| "loss": 0.1868, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 38.71, | |
| "eval_loss": 0.49217844009399414, | |
| "eval_micro_f1": 0.7918660282071867, | |
| "eval_runtime": 3.4731, | |
| "eval_samples_per_second": 240.707, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 39.35, | |
| "learning_rate": 8.688172043010754e-06, | |
| "loss": 0.1714, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 39.35, | |
| "eval_loss": 0.49292442202568054, | |
| "eval_micro_f1": 0.7930622004559891, | |
| "eval_runtime": 4.4418, | |
| "eval_samples_per_second": 188.211, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.1466, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.4939003586769104, | |
| "eval_micro_f1": 0.7906698559583843, | |
| "eval_runtime": 7.0534, | |
| "eval_samples_per_second": 118.524, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 40.65, | |
| "learning_rate": 8.64516129032258e-06, | |
| "loss": 0.1461, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 40.65, | |
| "eval_loss": 0.5058996677398682, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 5.5612, | |
| "eval_samples_per_second": 150.327, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 41.29, | |
| "learning_rate": 8.623655913978495e-06, | |
| "loss": 0.1778, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 41.29, | |
| "eval_loss": 0.486485093832016, | |
| "eval_micro_f1": 0.8026315784464082, | |
| "eval_runtime": 4.3478, | |
| "eval_samples_per_second": 192.28, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 41.94, | |
| "learning_rate": 8.602150537634409e-06, | |
| "loss": 0.1675, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 41.94, | |
| "eval_loss": 0.5116675496101379, | |
| "eval_micro_f1": 0.7739234444751508, | |
| "eval_runtime": 2.6063, | |
| "eval_samples_per_second": 320.761, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 42.58, | |
| "learning_rate": 8.580645161290323e-06, | |
| "loss": 0.1468, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 42.58, | |
| "eval_loss": 0.49054816365242004, | |
| "eval_micro_f1": 0.7882775114607795, | |
| "eval_runtime": 4.4425, | |
| "eval_samples_per_second": 188.184, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 43.23, | |
| "learning_rate": 8.559139784946238e-06, | |
| "loss": 0.1738, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 43.23, | |
| "eval_loss": 0.4893950819969177, | |
| "eval_micro_f1": 0.7870813392119771, | |
| "eval_runtime": 4.4169, | |
| "eval_samples_per_second": 189.272, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 43.87, | |
| "learning_rate": 8.537634408602152e-06, | |
| "loss": 0.1579, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 43.87, | |
| "eval_loss": 0.4949648082256317, | |
| "eval_micro_f1": 0.7858851669631748, | |
| "eval_runtime": 5.9011, | |
| "eval_samples_per_second": 141.669, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 44.52, | |
| "learning_rate": 8.516129032258065e-06, | |
| "loss": 0.1556, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 44.52, | |
| "eval_loss": 0.48870715498924255, | |
| "eval_micro_f1": 0.7918660282071867, | |
| "eval_runtime": 7.1164, | |
| "eval_samples_per_second": 117.475, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 45.16, | |
| "learning_rate": 8.494623655913979e-06, | |
| "loss": 0.169, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 45.16, | |
| "eval_loss": 0.4876663088798523, | |
| "eval_micro_f1": 0.7978468894511987, | |
| "eval_runtime": 2.5733, | |
| "eval_samples_per_second": 324.876, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 45.81, | |
| "learning_rate": 8.473118279569893e-06, | |
| "loss": 0.1659, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 45.81, | |
| "eval_loss": 0.48851579427719116, | |
| "eval_micro_f1": 0.7954545449535938, | |
| "eval_runtime": 7.1719, | |
| "eval_samples_per_second": 116.566, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 46.45, | |
| "learning_rate": 8.451612903225808e-06, | |
| "loss": 0.1621, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 46.45, | |
| "eval_loss": 0.48815056681632996, | |
| "eval_micro_f1": 0.7894736837095819, | |
| "eval_runtime": 5.0889, | |
| "eval_samples_per_second": 164.28, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 47.1, | |
| "learning_rate": 8.430107526881722e-06, | |
| "loss": 0.1667, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 47.1, | |
| "eval_loss": 0.49084004759788513, | |
| "eval_micro_f1": 0.78349282246557, | |
| "eval_runtime": 2.5855, | |
| "eval_samples_per_second": 323.336, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 47.74, | |
| "learning_rate": 8.408602150537634e-06, | |
| "loss": 0.1697, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 47.74, | |
| "eval_loss": 0.4899657368659973, | |
| "eval_micro_f1": 0.78349282246557, | |
| "eval_runtime": 7.1881, | |
| "eval_samples_per_second": 116.303, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "learning_rate": 8.387096774193549e-06, | |
| "loss": 0.1446, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "eval_loss": 0.47679710388183594, | |
| "eval_micro_f1": 0.801435406197606, | |
| "eval_runtime": 2.5746, | |
| "eval_samples_per_second": 324.715, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 49.03, | |
| "learning_rate": 8.365591397849463e-06, | |
| "loss": 0.1364, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 49.03, | |
| "eval_loss": 0.4702768921852112, | |
| "eval_micro_f1": 0.8181818176808394, | |
| "eval_runtime": 4.5292, | |
| "eval_samples_per_second": 184.582, | |
| "step": 3040 | |
| } | |
| ], | |
| "max_steps": 18600, | |
| "num_train_epochs": 300, | |
| "total_flos": 4154829689883924, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |