diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 1.5824520587921143, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.9315, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.012987012987012988, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.25, + "eval_PERSON_recall": 0.006666666666666667, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.6119179725646973, + "eval_overall_accuracy": 0.8370662072360325, + "eval_overall_f1": 0.005633802816901409, + "eval_overall_precision": 0.25, + "eval_overall_recall": 0.002849002849002849, + "eval_runtime": 0.3369, + "eval_samples_per_second": 555.024, + "eval_steps_per_second": 8.904, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 1.5226932764053345, + "learning_rate": 4.9e-05, + "loss": 0.5353, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.40983606557377056, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.5102040816326531, + "eval_LOCATION_recall": 0.3424657534246575, + "eval_ORGANIZATION_f1": 0.08695652173913043, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.1, + "eval_ORGANIZATION_recall": 0.07692307692307693, + "eval_PERSON_f1": 0.574468085106383, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.4778761061946903, + "eval_PERSON_recall": 0.72, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.5483870967741935, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.6071428571428571, + "eval_TIME_recall": 0.5, + "eval_loss": 0.411459743976593, + "eval_overall_accuracy": 0.8924440068914595, + "eval_overall_f1": 0.4397163120567376, + "eval_overall_precision": 0.4378531073446328, + "eval_overall_recall": 0.4415954415954416, + "eval_runtime": 0.3252, + "eval_samples_per_second": 574.99, + "eval_steps_per_second": 9.224, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 1.2673431634902954, + "learning_rate": 4.85e-05, + "loss": 0.3686, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.5747126436781609, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.49504950495049505, + "eval_LOCATION_recall": 0.684931506849315, + "eval_ORGANIZATION_f1": 0.45528455284552843, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.4827586206896552, + "eval_ORGANIZATION_recall": 0.4307692307692308, + "eval_PERSON_f1": 0.7507163323782233, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.6582914572864321, + "eval_PERSON_recall": 0.8733333333333333, + "eval_QUANTITY_f1": 0.3934426229508197, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.375, + "eval_QUANTITY_recall": 0.41379310344827586, + "eval_TIME_f1": 0.8115942028985507, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8235294117647058, + "eval_loss": 0.29026341438293457, + "eval_overall_accuracy": 0.9288702928870293, + "eval_overall_f1": 0.6417525773195876, + "eval_overall_precision": 0.5858823529411765, + "eval_overall_recall": 0.7094017094017094, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.209, + "eval_steps_per_second": 9.019, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 1.4100310802459717, + "learning_rate": 4.8e-05, + "loss": 0.2668, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.5664739884393063, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.49, + "eval_LOCATION_recall": 0.6712328767123288, + "eval_ORGANIZATION_f1": 0.4878048780487805, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5172413793103449, + "eval_ORGANIZATION_recall": 0.46153846153846156, + "eval_PERSON_f1": 0.8246153846153845, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7657142857142857, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.5245901639344263, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5, + "eval_QUANTITY_recall": 0.5517241379310345, + "eval_TIME_f1": 0.8823529411764706, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.8823529411764706, + "eval_TIME_recall": 0.8823529411764706, + "eval_loss": 0.207948699593544, + "eval_overall_accuracy": 0.9401919763721388, + "eval_overall_f1": 0.6906666666666667, + "eval_overall_precision": 0.6491228070175439, + "eval_overall_recall": 0.7378917378917379, + "eval_runtime": 0.3333, + "eval_samples_per_second": 560.99, + "eval_steps_per_second": 9.0, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 2.0687103271484375, + "learning_rate": 4.75e-05, + "loss": 0.2091, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.7195121951219512, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6483516483516484, + "eval_LOCATION_recall": 0.8082191780821918, + "eval_ORGANIZATION_f1": 0.5714285714285715, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5588235294117647, + "eval_ORGANIZATION_recall": 0.5846153846153846, + "eval_PERSON_f1": 0.8369230769230769, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7771428571428571, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.5846153846153845, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5277777777777778, + "eval_QUANTITY_recall": 0.6551724137931034, + "eval_TIME_f1": 0.8333333333333333, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7894736842105263, + "eval_TIME_recall": 0.8823529411764706, + "eval_loss": 0.17728105187416077, + "eval_overall_accuracy": 0.9451144474526212, + "eval_overall_f1": 0.7430830039525692, + "eval_overall_precision": 0.6911764705882353, + "eval_overall_recall": 0.8034188034188035, + "eval_runtime": 0.3284, + "eval_samples_per_second": 569.476, + "eval_steps_per_second": 9.136, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 2.4117839336395264, + "learning_rate": 4.7e-05, + "loss": 0.1833, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.6971428571428572, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.5980392156862745, + "eval_LOCATION_recall": 0.8356164383561644, + "eval_ORGANIZATION_f1": 0.5818181818181819, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.48, + "eval_ORGANIZATION_recall": 0.7384615384615385, + "eval_PERSON_f1": 0.8353658536585366, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7696629213483146, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.5555555555555556, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.46511627906976744, + "eval_QUANTITY_recall": 0.6896551724137931, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.189751997590065, + "eval_overall_accuracy": 0.9424070883583558, + "eval_overall_f1": 0.7248157248157246, + "eval_overall_precision": 0.6371490280777538, + "eval_overall_recall": 0.8404558404558404, + "eval_runtime": 0.3281, + "eval_samples_per_second": 569.94, + "eval_steps_per_second": 9.143, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 1.2822880744934082, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1653, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.7607361963190183, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6888888888888889, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.5857142857142857, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5466666666666666, + "eval_ORGANIZATION_recall": 0.6307692307692307, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8154761904761905, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.6666666666666667, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5945945945945946, + "eval_QUANTITY_recall": 0.7586206896551724, + "eval_TIME_f1": 0.8450704225352113, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.8108108108108109, + "eval_TIME_recall": 0.8823529411764706, + "eval_loss": 0.15423187613487244, + "eval_overall_accuracy": 0.9512675363032242, + "eval_overall_f1": 0.770448548812665, + "eval_overall_precision": 0.7174447174447175, + "eval_overall_recall": 0.8319088319088319, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.152, + "eval_steps_per_second": 8.986, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 1.469247817993164, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1529, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.7590361445783131, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6774193548387096, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.5915492957746479, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5454545454545454, + "eval_ORGANIZATION_recall": 0.6461538461538462, + "eval_PERSON_f1": 0.8500000000000001, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7619047619047619, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.861111111111111, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.8157894736842105, + "eval_TIME_recall": 0.9117647058823529, + "eval_loss": 0.14763768017292023, + "eval_overall_accuracy": 0.9527442776273689, + "eval_overall_f1": 0.7758846657929228, + "eval_overall_precision": 0.7184466019417476, + "eval_overall_recall": 0.8433048433048433, + "eval_runtime": 0.3346, + "eval_samples_per_second": 558.898, + "eval_steps_per_second": 8.966, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 1.5628341436386108, + "learning_rate": 4.55e-05, + "loss": 0.1428, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.7544910179640719, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6702127659574468, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6225165562913907, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5465116279069767, + "eval_ORGANIZATION_recall": 0.7230769230769231, + "eval_PERSON_f1": 0.879746835443038, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.6666666666666666, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.575, + "eval_QUANTITY_recall": 0.7931034482758621, + "eval_TIME_f1": 0.8266666666666665, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7560975609756098, + "eval_TIME_recall": 0.9117647058823529, + "eval_loss": 0.14981509745121002, + "eval_overall_accuracy": 0.9497907949790795, + "eval_overall_f1": 0.7789203084832904, + "eval_overall_precision": 0.7096018735362998, + "eval_overall_recall": 0.8632478632478633, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.163, + "eval_steps_per_second": 8.987, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 1.2276618480682373, + "learning_rate": 4.5e-05, + "loss": 0.132, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.779874213836478, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7209302325581395, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.5925925925925927, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5714285714285714, + "eval_ORGANIZATION_recall": 0.6153846153846154, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8154761904761905, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8108108108108107, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8823529411764706, + "eval_loss": 0.14296573400497437, + "eval_overall_accuracy": 0.9532365247354172, + "eval_overall_f1": 0.7808764940239045, + "eval_overall_precision": 0.7313432835820896, + "eval_overall_recall": 0.8376068376068376, + "eval_runtime": 0.3274, + "eval_samples_per_second": 571.224, + "eval_steps_per_second": 9.164, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 1.8262553215026855, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.1288, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.7515151515151515, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6739130434782609, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.6330935251798562, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5945945945945946, + "eval_ORGANIZATION_recall": 0.676923076923077, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7631578947368423, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.6904761904761905, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1460973471403122, + "eval_overall_accuracy": 0.9515136598572483, + "eval_overall_f1": 0.774869109947644, + "eval_overall_precision": 0.7167070217917676, + "eval_overall_recall": 0.8433048433048433, + "eval_runtime": 0.3342, + "eval_samples_per_second": 559.473, + "eval_steps_per_second": 8.976, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 1.1676838397979736, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.1219, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.7590361445783131, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.6774193548387096, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6433566433566433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5897435897435898, + "eval_ORGANIZATION_recall": 0.7076923076923077, + "eval_PERSON_f1": 0.8734177215189873, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8313253012048193, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7631578947368423, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.6904761904761905, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14986908435821533, + "eval_overall_accuracy": 0.9502830420871278, + "eval_overall_f1": 0.7802340702210663, + "eval_overall_precision": 0.7177033492822966, + "eval_overall_recall": 0.8547008547008547, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.614, + "eval_steps_per_second": 9.01, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 1.25841224193573, + "learning_rate": 4.35e-05, + "loss": 0.1176, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.7820512820512822, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8356164383561644, + "eval_ORGANIZATION_f1": 0.6666666666666666, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5975609756097561, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13805125653743744, + "eval_overall_accuracy": 0.9547132660595619, + "eval_overall_f1": 0.7968337730870712, + "eval_overall_precision": 0.742014742014742, + "eval_overall_recall": 0.8603988603988604, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.243, + "eval_steps_per_second": 9.02, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 1.1619478464126587, + "learning_rate": 4.3e-05, + "loss": 0.1161, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.782608695652174, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7159090909090909, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6666666666666666, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5975609756097561, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8761904761904761, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7536231884057972, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.896551724137931, + "eval_TIME_f1": 0.7631578947368423, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.6904761904761905, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14077436923980713, + "eval_overall_accuracy": 0.9532365247354172, + "eval_overall_f1": 0.7942708333333333, + "eval_overall_precision": 0.7314148681055156, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.885, + "eval_steps_per_second": 9.191, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 1.547819972038269, + "learning_rate": 4.25e-05, + "loss": 0.1148, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.7777777777777778, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7078651685393258, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7058823529411765, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6136363636363636, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1482475847005844, + "eval_overall_accuracy": 0.9537287718434654, + "eval_overall_f1": 0.7968952134540751, + "eval_overall_precision": 0.7298578199052133, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.301, + "eval_steps_per_second": 9.165, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 3.2295055389404297, + "learning_rate": 4.2e-05, + "loss": 0.1075, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6268656716417911, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6086956521739131, + "eval_ORGANIZATION_recall": 0.6461538461538462, + "eval_PERSON_f1": 0.8753993610223643, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8404907975460123, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7142857142857144, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6097560975609756, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14014115929603577, + "eval_overall_accuracy": 0.9554516367216342, + "eval_overall_f1": 0.7914438502673797, + "eval_overall_precision": 0.7455919395465995, + "eval_overall_recall": 0.8433048433048433, + "eval_runtime": 0.3353, + "eval_samples_per_second": 557.692, + "eval_steps_per_second": 8.947, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.9571211934089661, + "learning_rate": 4.15e-05, + "loss": 0.1058, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.7875, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7199999999999999, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6352941176470588, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8761904761904761, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7631578947368423, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.6904761904761905, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14074338972568512, + "eval_overall_accuracy": 0.9542210189515137, + "eval_overall_f1": 0.8057366362451107, + "eval_overall_precision": 0.7427884615384616, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.193, + "eval_steps_per_second": 9.18, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 1.2484526634216309, + "learning_rate": 4.1e-05, + "loss": 0.1021, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6433566433566433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5897435897435898, + "eval_ORGANIZATION_recall": 0.7076923076923077, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13723434507846832, + "eval_overall_accuracy": 0.9561900073837066, + "eval_overall_f1": 0.7936507936507937, + "eval_overall_precision": 0.7407407407407407, + "eval_overall_recall": 0.8547008547008547, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.38, + "eval_steps_per_second": 9.167, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 0.977418839931488, + "learning_rate": 4.05e-05, + "loss": 0.1032, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.8026315789473685, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8356164383561644, + "eval_ORGANIZATION_f1": 0.7066666666666667, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7812500000000001, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13176140189170837, + "eval_overall_accuracy": 0.958897366477972, + "eval_overall_f1": 0.8116710875331565, + "eval_overall_precision": 0.7593052109181141, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3289, + "eval_samples_per_second": 568.639, + "eval_steps_per_second": 9.123, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 1.2055819034576416, + "learning_rate": 4e-05, + "loss": 0.0979, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.7974683544303799, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7411764705882353, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7412587412587412, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6794871794871795, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1400451809167862, + "eval_overall_accuracy": 0.956928378045779, + "eval_overall_f1": 0.8132450331125828, + "eval_overall_precision": 0.7599009900990099, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3266, + "eval_samples_per_second": 572.647, + "eval_steps_per_second": 9.187, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 2.198835611343384, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.098, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.8181818181818182, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7777777777777778, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.725925925925926, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13511410355567932, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8194070080862534, + "eval_overall_precision": 0.7774936061381074, + "eval_overall_recall": 0.8660968660968661, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.204, + "eval_steps_per_second": 9.18, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 3.7243123054504395, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0942, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.7820512820512822, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8356164383561644, + "eval_ORGANIZATION_f1": 0.7027027027027027, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6265060240963856, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8509316770186336, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1375378519296646, + "eval_overall_accuracy": 0.9574206251538272, + "eval_overall_f1": 0.8085106382978723, + "eval_overall_precision": 0.7581047381546134, + "eval_overall_recall": 0.8660968660968661, + "eval_runtime": 0.3271, + "eval_samples_per_second": 571.696, + "eval_steps_per_second": 9.172, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 1.6769466400146484, + "learning_rate": 3.85e-05, + "loss": 0.0943, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.8104575163398693, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.76, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6705882352941176, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13511058688163757, + "eval_overall_accuracy": 0.9591434900319961, + "eval_overall_f1": 0.8249336870026525, + "eval_overall_precision": 0.771712158808933, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.272, + "eval_steps_per_second": 9.165, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 1.9959501028060913, + "learning_rate": 3.8e-05, + "loss": 0.091, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.8050314465408805, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7441860465116279, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7682119205298013, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6744186046511628, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8753993610223643, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8404907975460123, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14082586765289307, + "eval_overall_accuracy": 0.9579128722618755, + "eval_overall_f1": 0.8183006535947712, + "eval_overall_precision": 0.7560386473429952, + "eval_overall_recall": 0.8917378917378918, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.878, + "eval_steps_per_second": 9.158, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 2.225444793701172, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.089, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7464788732394366, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6883116883116883, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8509316770186336, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1414780169725418, + "eval_overall_accuracy": 0.9584051193699237, + "eval_overall_f1": 0.8186666666666667, + "eval_overall_precision": 0.7694235588972431, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3266, + "eval_samples_per_second": 572.607, + "eval_steps_per_second": 9.186, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 1.2359886169433594, + "learning_rate": 3.7e-05, + "loss": 0.0862, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.7898089171974523, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7380952380952381, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.7194244604316546, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6756756756756757, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7142857142857144, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6097560975609756, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14282022416591644, + "eval_overall_accuracy": 0.9561900073837066, + "eval_overall_f1": 0.8052980132450331, + "eval_overall_precision": 0.7524752475247525, + "eval_overall_recall": 0.8660968660968661, + "eval_runtime": 0.3275, + "eval_samples_per_second": 570.999, + "eval_steps_per_second": 9.16, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 1.0968230962753296, + "learning_rate": 3.65e-05, + "loss": 0.0836, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.7950310559006212, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7272727272727273, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7323943661971831, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6753246753246753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.676056338028169, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5714285714285714, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14485140144824982, + "eval_overall_accuracy": 0.9556977602756583, + "eval_overall_f1": 0.8068331143232589, + "eval_overall_precision": 0.748780487804878, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3284, + "eval_samples_per_second": 569.433, + "eval_steps_per_second": 9.135, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 1.7558865547180176, + "learning_rate": 3.6e-05, + "loss": 0.0801, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8104575163398693, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.7338129496402878, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6891891891891891, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8774193548387097, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.85, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13724297285079956, + "eval_overall_accuracy": 0.9579128722618755, + "eval_overall_f1": 0.8156123822341856, + "eval_overall_precision": 0.7729591836734694, + "eval_overall_recall": 0.8632478632478633, + "eval_runtime": 0.3346, + "eval_samples_per_second": 558.811, + "eval_steps_per_second": 8.965, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 1.0294597148895264, + "learning_rate": 3.55e-05, + "loss": 0.0779, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.7974683544303799, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7411764705882353, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7682119205298013, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6744186046511628, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1392383724451065, + "eval_overall_accuracy": 0.958897366477972, + "eval_overall_f1": 0.8236842105263158, + "eval_overall_precision": 0.7652811735941321, + "eval_overall_recall": 0.8917378917378918, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.248, + "eval_steps_per_second": 8.988, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 1.9617377519607544, + "learning_rate": 3.5e-05, + "loss": 0.0776, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.725925925925926, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13599379360675812, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8236877523553163, + "eval_overall_precision": 0.7806122448979592, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.281, + "eval_steps_per_second": 9.037, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.6533139944076538, + "learning_rate": 3.45e-05, + "loss": 0.0764, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.8101265822784809, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7529411764705882, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7586206896551724, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14058609306812286, + "eval_overall_accuracy": 0.9598818606940684, + "eval_overall_f1": 0.8173455978975033, + "eval_overall_precision": 0.7585365853658537, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3334, + "eval_samples_per_second": 560.883, + "eval_steps_per_second": 8.998, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.820826530456543, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0745, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.7974683544303799, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7411764705882353, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7567567567567568, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6746987951807228, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6857142857142856, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5853658536585366, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1464959979057312, + "eval_overall_accuracy": 0.9576667487078513, + "eval_overall_f1": 0.8157894736842105, + "eval_overall_precision": 0.7579462102689487, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.647, + "eval_steps_per_second": 9.01, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 0.9249401688575745, + "learning_rate": 3.35e-05, + "loss": 0.0729, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.72992700729927, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6944444444444444, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8782051282051282, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7936507936507937, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13697849214076996, + "eval_overall_accuracy": 0.9606202313561408, + "eval_overall_f1": 0.8243243243243243, + "eval_overall_precision": 0.7840616966580977, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.3343, + "eval_samples_per_second": 559.37, + "eval_steps_per_second": 8.974, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 3.431596040725708, + "learning_rate": 3.3e-05, + "loss": 0.0719, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.8311688311688312, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.6814814814814816, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7076923076923077, + "eval_PERSON_f1": 0.8838709677419355, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7812500000000001, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1357881724834442, + "eval_overall_accuracy": 0.9616047255722373, + "eval_overall_f1": 0.819047619047619, + "eval_overall_precision": 0.7838541666666666, + "eval_overall_recall": 0.8575498575498576, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.136, + "eval_steps_per_second": 9.018, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 0.9399017691612244, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0746, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.8050314465408805, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7441860465116279, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7733333333333334, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7073170731707317, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14653536677360535, + "eval_overall_accuracy": 0.9574206251538272, + "eval_overall_f1": 0.8141361256544503, + "eval_overall_precision": 0.7530266343825666, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.886, + "eval_steps_per_second": 9.014, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.6335604190826416, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0676, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7464788732394366, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6883116883116883, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13876868784427643, + "eval_overall_accuracy": 0.9603741078021166, + "eval_overall_f1": 0.8251001335113485, + "eval_overall_precision": 0.7763819095477387, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.873, + "eval_steps_per_second": 9.014, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 1.0286895036697388, + "learning_rate": 3.15e-05, + "loss": 0.0674, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8761904761904761, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14836791157722473, + "eval_overall_accuracy": 0.9586512429239479, + "eval_overall_f1": 0.8131578947368421, + "eval_overall_precision": 0.7555012224938875, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.202, + "eval_steps_per_second": 8.987, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 0.754433274269104, + "learning_rate": 3.1e-05, + "loss": 0.0653, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.6911764705882353, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.7230769230769231, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1401306837797165, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8096514745308312, + "eval_overall_precision": 0.7645569620253164, + "eval_overall_recall": 0.8603988603988604, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.239, + "eval_steps_per_second": 9.02, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 1.2110971212387085, + "learning_rate": 3.05e-05, + "loss": 0.0638, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.8104575163398693, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.7534246575342466, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6790123456790124, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1372911036014557, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.8229027962716379, + "eval_overall_precision": 0.7725, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.249, + "eval_steps_per_second": 9.036, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 0.7484323382377625, + "learning_rate": 3e-05, + "loss": 0.0641, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.7875, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7567567567567568, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6746987951807228, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.879746835443038, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.6857142857142856, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5853658536585366, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15354810655117035, + "eval_overall_accuracy": 0.9566822544917548, + "eval_overall_f1": 0.8098958333333335, + "eval_overall_precision": 0.7458033573141487, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.882, + "eval_steps_per_second": 9.191, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 0.8354088068008423, + "learning_rate": 2.95e-05, + "loss": 0.0632, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7801418439716312, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7236842105263158, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.13777735829353333, + "eval_overall_accuracy": 0.9613586020182131, + "eval_overall_f1": 0.8326639892904953, + "eval_overall_precision": 0.7853535353535354, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.553, + "eval_steps_per_second": 9.009, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 0.5027186274528503, + "learning_rate": 2.9e-05, + "loss": 0.0607, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.8104575163398693, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8493150684931506, + "eval_ORGANIZATION_f1": 0.7464788732394366, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6883116883116883, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8782051282051282, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1403217911720276, + "eval_overall_accuracy": 0.9606202313561408, + "eval_overall_f1": 0.8192771084337349, + "eval_overall_precision": 0.7727272727272727, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3336, + "eval_samples_per_second": 560.597, + "eval_steps_per_second": 8.994, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 0.9568884968757629, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0596, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.8311688311688312, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7323943661971831, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6753246753246753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8838709677419355, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1430799663066864, + "eval_overall_accuracy": 0.9616047255722373, + "eval_overall_f1": 0.8230563002680965, + "eval_overall_precision": 0.7772151898734178, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3331, + "eval_samples_per_second": 561.433, + "eval_steps_per_second": 9.007, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 5.6017351150512695, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0582, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7375886524822695, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6842105263157895, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.7837837837837837, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.725, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14417453110218048, + "eval_overall_accuracy": 0.961112478464189, + "eval_overall_f1": 0.8229027962716379, + "eval_overall_precision": 0.7725, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3257, + "eval_samples_per_second": 574.07, + "eval_steps_per_second": 9.21, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 2.356386184692383, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0606, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.763888888888889, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6962025316455697, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.879746835443038, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14679183065891266, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8232189973614775, + "eval_overall_precision": 0.7665847665847666, + "eval_overall_recall": 0.8888888888888888, + "eval_runtime": 0.3259, + "eval_samples_per_second": 573.862, + "eval_steps_per_second": 9.206, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.9929021000862122, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0563, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.8311688311688312, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7285714285714285, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.68, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14097212255001068, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7752525252525253, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.632, + "eval_steps_per_second": 9.01, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 0.8276335597038269, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0556, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.7974683544303799, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7411764705882353, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7724137931034483, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.879746835443038, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.6857142857142856, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.5853658536585366, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15497690439224243, + "eval_overall_accuracy": 0.9579128722618755, + "eval_overall_f1": 0.8173455978975033, + "eval_overall_precision": 0.7585365853658537, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.4, + "eval_steps_per_second": 9.039, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 2.3347344398498535, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0578, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7534246575342466, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6790123456790124, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.7945205479452054, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7435897435897436, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1532071828842163, + "eval_overall_accuracy": 0.9576667487078513, + "eval_overall_f1": 0.8184210526315789, + "eval_overall_precision": 0.7603911980440098, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.241, + "eval_steps_per_second": 9.036, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 0.38269466161727905, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0559, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.8311688311688312, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7428571428571429, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6933333333333334, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14444759488105774, + "eval_overall_accuracy": 0.9618508491262614, + "eval_overall_f1": 0.8262032085561498, + "eval_overall_precision": 0.7783375314861462, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.256, + "eval_steps_per_second": 9.036, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 1.5551215410232544, + "learning_rate": 2.5e-05, + "loss": 0.0544, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.757142857142857, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7066666666666667, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1467483639717102, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.8266666666666667, + "eval_overall_precision": 0.7769423558897243, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.888, + "eval_steps_per_second": 9.014, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 1.1728203296661377, + "learning_rate": 2.45e-05, + "loss": 0.0511, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7246376811594203, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.684931506849315, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14386440813541412, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7752525252525253, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3262, + "eval_samples_per_second": 573.233, + "eval_steps_per_second": 9.196, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 1.3139925003051758, + "learning_rate": 2.4e-05, + "loss": 0.0516, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7567567567567568, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6746987951807228, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8734177215189873, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8313253012048193, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15267647802829742, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8221343873517786, + "eval_overall_precision": 0.7647058823529411, + "eval_overall_recall": 0.8888888888888888, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.953, + "eval_steps_per_second": 9.015, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.7045568227767944, + "learning_rate": 2.35e-05, + "loss": 0.0524, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8742138364779874, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8273809523809523, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15286701917648315, + "eval_overall_accuracy": 0.9598818606940684, + "eval_overall_f1": 0.8194993412384717, + "eval_overall_precision": 0.7622549019607843, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3345, + "eval_samples_per_second": 559.086, + "eval_steps_per_second": 8.969, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 2.6849210262298584, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0513, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7450980392156863, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6477272727272727, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15772588551044464, + "eval_overall_accuracy": 0.9576667487078513, + "eval_overall_f1": 0.817824377457405, + "eval_overall_precision": 0.7572815533980582, + "eval_overall_recall": 0.8888888888888888, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.4, + "eval_steps_per_second": 9.039, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 0.8173909783363342, + "learning_rate": 2.25e-05, + "loss": 0.0506, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.8235294117647058, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7428571428571429, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6933333333333334, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15098540484905243, + "eval_overall_accuracy": 0.9603741078021166, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7752525252525253, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3334, + "eval_samples_per_second": 560.961, + "eval_steps_per_second": 8.999, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 1.8647905588150024, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0511, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7517730496453899, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6973684210526315, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14845135807991028, + "eval_overall_accuracy": 0.9606202313561408, + "eval_overall_f1": 0.8255659121171771, + "eval_overall_precision": 0.775, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3279, + "eval_samples_per_second": 570.365, + "eval_steps_per_second": 9.15, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 0.22929580509662628, + "learning_rate": 2.15e-05, + "loss": 0.0494, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.8181818181818182, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7777777777777778, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7285714285714285, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.68, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14559510350227356, + "eval_overall_accuracy": 0.9618508491262614, + "eval_overall_f1": 0.8230563002680965, + "eval_overall_precision": 0.7772151898734178, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.194, + "eval_steps_per_second": 9.18, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 0.6386439800262451, + "learning_rate": 2.1e-05, + "loss": 0.0494, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.8101265822784809, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7529411764705882, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7391304347826088, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6986301369863014, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15255826711654663, + "eval_overall_accuracy": 0.9603741078021166, + "eval_overall_f1": 0.8181818181818181, + "eval_overall_precision": 0.7707808564231738, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.613, + "eval_steps_per_second": 9.026, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 19.432247161865234, + "learning_rate": 2.05e-05, + "loss": 0.0478, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1611391305923462, + "eval_overall_accuracy": 0.9586512429239479, + "eval_overall_f1": 0.8190224570673711, + "eval_overall_precision": 0.7635467980295566, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3315, + "eval_samples_per_second": 564.041, + "eval_steps_per_second": 9.049, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 2.1011621952056885, + "learning_rate": 2e-05, + "loss": 0.0471, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.8311688311688312, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7092198581560285, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6578947368421053, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14900296926498413, + "eval_overall_accuracy": 0.9613586020182131, + "eval_overall_f1": 0.8214765100671142, + "eval_overall_precision": 0.7766497461928934, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.901, + "eval_steps_per_second": 9.014, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 1.5730093717575073, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0457, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.8366013071895425, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7301587301587301, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6764705882352942, + "eval_QUANTITY_recall": 0.7931034482758621, + "eval_TIME_f1": 0.8285714285714286, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.8055555555555556, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14521339535713196, + "eval_overall_accuracy": 0.9623430962343096, + "eval_overall_f1": 0.82907133243607, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.784, + "eval_steps_per_second": 9.013, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 0.7596818208694458, + "learning_rate": 1.9e-05, + "loss": 0.0453, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.8129032258064516, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7682926829268293, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7234042553191491, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6710526315789473, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1535465270280838, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.8203753351206434, + "eval_overall_precision": 0.7746835443037975, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.007, + "eval_steps_per_second": 9.193, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 3.6123743057250977, + "learning_rate": 1.85e-05, + "loss": 0.0439, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.8129032258064516, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7682926829268293, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15502122044563293, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8218085106382977, + "eval_overall_precision": 0.770573566084788, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3317, + "eval_samples_per_second": 563.702, + "eval_steps_per_second": 9.043, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 0.6118897199630737, + "learning_rate": 1.8e-05, + "loss": 0.0467, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.8289473684210525, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.6814814814814816, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7076923076923077, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7419354838709677, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.696969696969697, + "eval_QUANTITY_recall": 0.7931034482758621, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.14392048120498657, + "eval_overall_accuracy": 0.9613586020182131, + "eval_overall_f1": 0.8169398907103825, + "eval_overall_precision": 0.7847769028871391, + "eval_overall_recall": 0.8518518518518519, + "eval_runtime": 0.3318, + "eval_samples_per_second": 563.555, + "eval_steps_per_second": 9.041, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 1.4808624982833862, + "learning_rate": 1.75e-05, + "loss": 0.0422, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.160318523645401, + "eval_overall_accuracy": 0.9591434900319961, + "eval_overall_f1": 0.8185430463576159, + "eval_overall_precision": 0.7648514851485149, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.291, + "eval_steps_per_second": 9.037, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 1.028084397315979, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0423, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7575757575757576, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1588248461484909, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.824468085106383, + "eval_overall_precision": 0.773067331670823, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.167, + "eval_steps_per_second": 9.195, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 1.2602914571762085, + "learning_rate": 1.65e-05, + "loss": 0.0422, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.8441558441558442, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.8024691358024691, + "eval_LOCATION_recall": 0.8904109589041096, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15185856819152832, + "eval_overall_accuracy": 0.9623430962343096, + "eval_overall_f1": 0.8282290279627165, + "eval_overall_precision": 0.7775, + "eval_overall_recall": 0.886039886039886, + "eval_runtime": 0.3269, + "eval_samples_per_second": 572.038, + "eval_steps_per_second": 9.177, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 0.7608669996261597, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0416, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15464389324188232, + "eval_overall_accuracy": 0.9616047255722373, + "eval_overall_f1": 0.824468085106383, + "eval_overall_precision": 0.773067331670823, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.246, + "eval_steps_per_second": 9.036, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 1.0217771530151367, + "learning_rate": 1.55e-05, + "loss": 0.0403, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.8235294117647058, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.75, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1511322259902954, + "eval_overall_accuracy": 0.9618508491262614, + "eval_overall_f1": 0.824631860776439, + "eval_overall_precision": 0.7777777777777778, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.81, + "eval_steps_per_second": 9.013, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 1.7045354843139648, + "learning_rate": 1.5e-05, + "loss": 0.0411, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15936115384101868, + "eval_overall_accuracy": 0.9598818606940684, + "eval_overall_f1": 0.8222811671087533, + "eval_overall_precision": 0.7692307692307693, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.176, + "eval_steps_per_second": 8.987, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 1.8637417554855347, + "learning_rate": 1.45e-05, + "loss": 0.0391, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.17168858647346497, + "eval_overall_accuracy": 0.9561900073837066, + "eval_overall_f1": 0.8156862745098039, + "eval_overall_precision": 0.7536231884057971, + "eval_overall_recall": 0.8888888888888888, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.182, + "eval_steps_per_second": 9.035, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 1.0552111864089966, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0393, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.8101265822784809, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7529411764705882, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7397260273972603, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16536851227283478, + "eval_overall_accuracy": 0.9584051193699237, + "eval_overall_f1": 0.8190224570673711, + "eval_overall_precision": 0.7635467980295566, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.834, + "eval_steps_per_second": 9.19, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 0.6572228670120239, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0411, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7323943661971831, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6753246753246753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15696407854557037, + "eval_overall_accuracy": 0.9608663549101649, + "eval_overall_f1": 0.822429906542056, + "eval_overall_precision": 0.7738693467336684, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3332, + "eval_samples_per_second": 561.214, + "eval_steps_per_second": 9.003, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 0.9044064283370972, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0408, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7323943661971831, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6753246753246753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1582627296447754, + "eval_overall_accuracy": 0.9613586020182131, + "eval_overall_f1": 0.824, + "eval_overall_precision": 0.7744360902255639, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.385, + "eval_steps_per_second": 9.038, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.5799534916877747, + "learning_rate": 1.25e-05, + "loss": 0.0389, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7310344827586207, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6625, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16305667161941528, + "eval_overall_accuracy": 0.9591434900319961, + "eval_overall_f1": 0.8180610889774236, + "eval_overall_precision": 0.7661691542288557, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3349, + "eval_samples_per_second": 558.344, + "eval_steps_per_second": 8.957, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 1.983021855354309, + "learning_rate": 1.2e-05, + "loss": 0.0377, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.15857818722724915, + "eval_overall_accuracy": 0.9613586020182131, + "eval_overall_f1": 0.8207171314741036, + "eval_overall_precision": 0.7686567164179104, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3261, + "eval_samples_per_second": 573.446, + "eval_steps_per_second": 9.2, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 1.1891909837722778, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0404, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7272727272727273, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16166049242019653, + "eval_overall_accuracy": 0.9601279842480925, + "eval_overall_f1": 0.8207171314741036, + "eval_overall_precision": 0.7686567164179104, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3315, + "eval_samples_per_second": 564.102, + "eval_steps_per_second": 9.05, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 0.46722444891929626, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0388, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7310344827586207, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6625, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8169014084507041, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7837837837837838, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16035309433937073, + "eval_overall_accuracy": 0.9618508491262614, + "eval_overall_f1": 0.823529411764706, + "eval_overall_precision": 0.7758186397984886, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.645, + "eval_steps_per_second": 9.026, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 0.8686147332191467, + "learning_rate": 1.05e-05, + "loss": 0.0383, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7183098591549296, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6623376623376623, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16000543534755707, + "eval_overall_accuracy": 0.961112478464189, + "eval_overall_f1": 0.8192771084337349, + "eval_overall_precision": 0.7727272727272727, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3327, + "eval_samples_per_second": 561.998, + "eval_steps_per_second": 9.016, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 0.674788773059845, + "learning_rate": 1e-05, + "loss": 0.0382, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.696969696969697, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6216216216216216, + "eval_QUANTITY_recall": 0.7931034482758621, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16522908210754395, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8169761273209548, + "eval_overall_precision": 0.7642679900744417, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3342, + "eval_samples_per_second": 559.61, + "eval_steps_per_second": 8.978, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 1.6788341999053955, + "learning_rate": 9.5e-06, + "loss": 0.0378, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.8101265822784809, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7529411764705882, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.17060095071792603, + "eval_overall_accuracy": 0.9591434900319961, + "eval_overall_f1": 0.8153034300791556, + "eval_overall_precision": 0.7592137592137592, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3359, + "eval_samples_per_second": 556.794, + "eval_steps_per_second": 8.933, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 0.7395833730697632, + "learning_rate": 9e-06, + "loss": 0.039, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.8101265822784809, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7529411764705882, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7272727272727273, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16823621094226837, + "eval_overall_accuracy": 0.9598818606940684, + "eval_overall_f1": 0.8196286472148541, + "eval_overall_precision": 0.7667493796526055, + "eval_overall_recall": 0.8803418803418803, + "eval_runtime": 0.3356, + "eval_samples_per_second": 557.222, + "eval_steps_per_second": 8.939, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 0.7045236229896545, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0372, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.6944444444444444, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7272727272727273, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16675511002540588, + "eval_overall_accuracy": 0.958897366477972, + "eval_overall_f1": 0.8095872170439414, + "eval_overall_precision": 0.76, + "eval_overall_recall": 0.8660968660968661, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.306, + "eval_steps_per_second": 9.021, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 2.7517426013946533, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0368, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1667993813753128, + "eval_overall_accuracy": 0.9598818606940684, + "eval_overall_f1": 0.8211920529801325, + "eval_overall_precision": 0.7673267326732673, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.958, + "eval_steps_per_second": 9.015, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 2.215299606323242, + "learning_rate": 7.5e-06, + "loss": 0.0374, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7194244604316546, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6756756756756757, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16433657705783844, + "eval_overall_accuracy": 0.9606202313561408, + "eval_overall_f1": 0.8176943699731903, + "eval_overall_precision": 0.7721518987341772, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.286, + "eval_steps_per_second": 9.165, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 0.40606042742729187, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0369, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7083333333333334, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6455696202531646, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1689991056919098, + "eval_overall_accuracy": 0.9584051193699237, + "eval_overall_f1": 0.8154050464807436, + "eval_overall_precision": 0.763681592039801, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.979, + "eval_steps_per_second": 9.016, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 1.3081475496292114, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.036, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7132867132867133, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6538461538461539, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7272727272727273, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16559894382953644, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8170894526034712, + "eval_overall_precision": 0.7688442211055276, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3315, + "eval_samples_per_second": 564.057, + "eval_steps_per_second": 9.049, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 0.7401718497276306, + "learning_rate": 6e-06, + "loss": 0.037, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7132867132867133, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6538461538461539, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7272727272727273, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16509054601192474, + "eval_overall_accuracy": 0.9603741078021166, + "eval_overall_f1": 0.8170894526034712, + "eval_overall_precision": 0.7688442211055276, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.977, + "eval_steps_per_second": 9.032, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 0.570732057094574, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0373, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7172413793103448, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.65, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16918015480041504, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8164893617021275, + "eval_overall_precision": 0.7655860349127181, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.287, + "eval_steps_per_second": 9.165, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 1.0674538612365723, + "learning_rate": 5e-06, + "loss": 0.0357, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7092198581560285, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6578947368421053, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16437728703022003, + "eval_overall_accuracy": 0.9603741078021166, + "eval_overall_f1": 0.8155080213903743, + "eval_overall_precision": 0.7682619647355163, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.327, + "eval_samples_per_second": 571.778, + "eval_steps_per_second": 9.173, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 0.779975414276123, + "learning_rate": 4.5e-06, + "loss": 0.0365, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1706375926733017, + "eval_overall_accuracy": 0.9596357371400444, + "eval_overall_f1": 0.8211920529801325, + "eval_overall_precision": 0.7673267326732673, + "eval_overall_recall": 0.8831908831908832, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.858, + "eval_steps_per_second": 9.014, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 1.6607707738876343, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0358, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7210884353741497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6463414634146342, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16552191972732544, + "eval_overall_accuracy": 0.9606202313561408, + "eval_overall_f1": 0.8158940397350993, + "eval_overall_precision": 0.7623762376237624, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3345, + "eval_samples_per_second": 559.095, + "eval_steps_per_second": 8.969, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.364473819732666, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0367, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7272727272727272, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16641728579998016, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8164893617021275, + "eval_overall_precision": 0.7655860349127181, + "eval_overall_recall": 0.8746438746438746, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.903, + "eval_steps_per_second": 9.191, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 1.1335537433624268, + "learning_rate": 3e-06, + "loss": 0.0367, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7123287671232877, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6419753086419753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16745708882808685, + "eval_overall_accuracy": 0.958897366477972, + "eval_overall_f1": 0.8116710875331565, + "eval_overall_precision": 0.7593052109181141, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3331, + "eval_samples_per_second": 561.459, + "eval_steps_per_second": 9.007, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 2.660787582397461, + "learning_rate": 2.5e-06, + "loss": 0.0358, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.8076923076923077, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.863013698630137, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.17253383994102478, + "eval_overall_accuracy": 0.9591434900319961, + "eval_overall_f1": 0.8148148148148149, + "eval_overall_precision": 0.7604938271604939, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.55, + "eval_steps_per_second": 9.025, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 1.2606465816497803, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0374, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7123287671232877, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6419753086419753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8620689655172413, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1706884801387787, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8158940397350993, + "eval_overall_precision": 0.7623762376237624, + "eval_overall_recall": 0.8774928774928775, + "eval_runtime": 0.3336, + "eval_samples_per_second": 560.539, + "eval_steps_per_second": 8.993, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 2.224013090133667, + "learning_rate": 1.5e-06, + "loss": 0.0342, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7132867132867133, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6538461538461539, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16819004714488983, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8149134487350199, + "eval_overall_precision": 0.765, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.071, + "eval_steps_per_second": 9.017, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 1.7407749891281128, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0351, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.8205128205128205, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.7234042553191491, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6710526315789473, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16785794496536255, + "eval_overall_accuracy": 0.9593896135860202, + "eval_overall_f1": 0.8170894526034712, + "eval_overall_precision": 0.7688442211055276, + "eval_overall_recall": 0.8717948717948718, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.612, + "eval_steps_per_second": 9.01, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 0.9356883764266968, + "learning_rate": 5.000000000000001e-07, + "loss": 0.036, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.6993006993006994, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6410256410256411, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.1689637303352356, + "eval_overall_accuracy": 0.9586512429239479, + "eval_overall_f1": 0.8111702127659575, + "eval_overall_precision": 0.7605985037406484, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.3253, + "eval_samples_per_second": 574.83, + "eval_steps_per_second": 9.222, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 0.6478390693664551, + "learning_rate": 0.0, + "loss": 0.0368, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.8152866242038217, + "eval_LOCATION_number": 73, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.8767123287671232, + "eval_ORGANIZATION_f1": 0.6993006993006994, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6410256410256411, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8817891373801918, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.716417910447761, + "eval_QUANTITY_number": 29, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8275862068965517, + "eval_TIME_f1": 0.8055555555555555, + "eval_TIME_number": 34, + "eval_TIME_precision": 0.7631578947368421, + "eval_TIME_recall": 0.8529411764705882, + "eval_loss": 0.16865810751914978, + "eval_overall_accuracy": 0.9586512429239479, + "eval_overall_f1": 0.8111702127659575, + "eval_overall_precision": 0.7605985037406484, + "eval_overall_recall": 0.8689458689458689, + "eval_runtime": 0.3331, + "eval_samples_per_second": 561.343, + "eval_steps_per_second": 9.006, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 4567780799240064.0, + "train_loss": 0.08414012978661735, + "train_runtime": 604.7606, + "train_samples_per_second": 279.119, + "train_steps_per_second": 17.528 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 4567780799240064.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}