diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 2.8471529483795166, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.9847, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.022988505747126436, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.08333333333333333, + "eval_PERSON_recall": 0.013333333333333334, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.5584226250648499, + "eval_overall_accuracy": 0.8448108632395732, + "eval_overall_f1": 0.010256410256410256, + "eval_overall_precision": 0.046511627906976744, + "eval_overall_recall": 0.005763688760806916, + "eval_runtime": 0.9693, + "eval_samples_per_second": 192.917, + "eval_steps_per_second": 3.095, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 1.1584385633468628, + "learning_rate": 4.9e-05, + "loss": 0.4691, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.4137931034482758, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5333333333333333, + "eval_LOCATION_recall": 0.3380281690140845, + "eval_ORGANIZATION_f1": 0.10769230769230768, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.1076923076923077, + "eval_ORGANIZATION_recall": 0.1076923076923077, + "eval_PERSON_f1": 0.735632183908046, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.6464646464646465, + "eval_PERSON_recall": 0.8533333333333334, + "eval_QUANTITY_f1": 0.4155844155844156, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.36363636363636365, + "eval_QUANTITY_recall": 0.48484848484848486, + "eval_TIME_f1": 0.7017543859649122, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.6896551724137931, + "eval_TIME_recall": 0.7142857142857143, + "eval_loss": 0.3233771026134491, + "eval_overall_accuracy": 0.9034917555771096, + "eval_overall_f1": 0.5357142857142857, + "eval_overall_precision": 0.5118110236220472, + "eval_overall_recall": 0.5619596541786743, + "eval_runtime": 0.9861, + "eval_samples_per_second": 189.633, + "eval_steps_per_second": 3.042, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 1.0544712543487549, + "learning_rate": 4.85e-05, + "loss": 0.2843, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5925925925925926, + "eval_LOCATION_recall": 0.676056338028169, + "eval_ORGANIZATION_f1": 0.4415584415584416, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.38202247191011235, + "eval_ORGANIZATION_recall": 0.5230769230769231, + "eval_PERSON_f1": 0.8322981366459627, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7790697674418605, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.5789473684210527, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5116279069767442, + "eval_QUANTITY_recall": 0.6666666666666666, + "eval_TIME_f1": 0.7924528301886793, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.84, + "eval_TIME_recall": 0.75, + "eval_loss": 0.1987062692642212, + "eval_overall_accuracy": 0.9325897187196897, + "eval_overall_f1": 0.6842800528401585, + "eval_overall_precision": 0.6317073170731707, + "eval_overall_recall": 0.7463976945244957, + "eval_runtime": 1.0128, + "eval_samples_per_second": 184.633, + "eval_steps_per_second": 2.962, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 0.8043866157531738, + "learning_rate": 4.8e-05, + "loss": 0.2104, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.6790123456790124, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6043956043956044, + "eval_LOCATION_recall": 0.7746478873239436, + "eval_ORGANIZATION_f1": 0.6428571428571429, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5242718446601942, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8466257668711655, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7840909090909091, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6329113924050633, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5434782608695652, + "eval_QUANTITY_recall": 0.7575757575757576, + "eval_TIME_f1": 0.8363636363636364, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8518518518518519, + "eval_TIME_recall": 0.8214285714285714, + "eval_loss": 0.16895577311515808, + "eval_overall_accuracy": 0.9451988360814743, + "eval_overall_f1": 0.7468354430379747, + "eval_overall_precision": 0.6659142212189616, + "eval_overall_recall": 0.8501440922190202, + "eval_runtime": 0.9716, + "eval_samples_per_second": 192.476, + "eval_steps_per_second": 3.088, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 1.0541787147521973, + "learning_rate": 4.75e-05, + "loss": 0.181, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.6909090909090909, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6063829787234043, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.6751592356687899, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5760869565217391, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8535825545171339, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8011695906432749, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7250000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6170212765957447, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7027027027027027, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.16010892391204834, + "eval_overall_accuracy": 0.9451988360814743, + "eval_overall_f1": 0.766497461928934, + "eval_overall_precision": 0.6848072562358276, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.9657, + "eval_samples_per_second": 193.642, + "eval_steps_per_second": 3.107, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 1.3196969032287598, + "learning_rate": 4.7e-05, + "loss": 0.1652, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.6867469879518073, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.6832298136645962, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5729166666666666, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8509316770186336, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7965116279069767, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.6136363636363636, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.4909090909090909, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.6666666666666667, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.5609756097560976, + "eval_TIME_recall": 0.8214285714285714, + "eval_loss": 0.17207954823970795, + "eval_overall_accuracy": 0.9364694471387003, + "eval_overall_f1": 0.7419354838709677, + "eval_overall_precision": 0.6514161220043573, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.9481, + "eval_samples_per_second": 197.23, + "eval_steps_per_second": 3.164, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 2.4317102432250977, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1508, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.7204968944099378, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6444444444444445, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5894736842105263, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8509316770186336, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7965116279069767, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.14870180189609528, + "eval_overall_accuracy": 0.9495635305528612, + "eval_overall_f1": 0.781491002570694, + "eval_overall_precision": 0.7053364269141531, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9672, + "eval_samples_per_second": 193.333, + "eval_steps_per_second": 3.102, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 0.6843542456626892, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1354, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.7151515151515151, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6276595744680851, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7169811320754718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6063829787234043, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8679245283018867, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8214285714285714, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.736842105263158, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6511627906976745, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1432064324617386, + "eval_overall_accuracy": 0.9507759456838021, + "eval_overall_f1": 0.7912371134020619, + "eval_overall_precision": 0.7156177156177156, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 1.0001, + "eval_samples_per_second": 186.989, + "eval_steps_per_second": 3.0, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 1.1372586488723755, + "learning_rate": 4.55e-05, + "loss": 0.1315, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.7261146496815286, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6627906976744186, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7151515151515151, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.59, + "eval_ORGANIZATION_recall": 0.9076923076923077, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.675, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.574468085106383, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1347617655992508, + "eval_overall_accuracy": 0.9512609117361784, + "eval_overall_f1": 0.7927461139896372, + "eval_overall_precision": 0.72, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9806, + "eval_samples_per_second": 190.709, + "eval_steps_per_second": 3.059, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 1.2374799251556396, + "learning_rate": 4.5e-05, + "loss": 0.1284, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.7866666666666666, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7468354430379747, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7586206896551724, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8761904761904761, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.819672131147541, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.12180344015359879, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8161073825503355, + "eval_overall_precision": 0.7638190954773869, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9802, + "eval_samples_per_second": 190.777, + "eval_steps_per_second": 3.061, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 1.0754990577697754, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.1172, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.7368421052631579, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.691358024691358, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6551724137931034, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.887459807073955, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8571428571428571, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6835443037974683, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5869565217391305, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.7741935483870968, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7058823529411765, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13840417563915253, + "eval_overall_accuracy": 0.9529582929194956, + "eval_overall_f1": 0.7989417989417988, + "eval_overall_precision": 0.7383863080684596, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.9779, + "eval_samples_per_second": 191.22, + "eval_steps_per_second": 3.068, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 1.2686794996261597, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.1155, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.7169811320754716, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6477272727272727, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7341772151898733, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6236559139784946, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.7575757575757577, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.6578947368421053, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.14894507825374603, + "eval_overall_accuracy": 0.9500484966052376, + "eval_overall_f1": 0.7911802853437095, + "eval_overall_precision": 0.7193396226415094, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9687, + "eval_samples_per_second": 193.048, + "eval_steps_per_second": 3.097, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 0.8799005150794983, + "learning_rate": 4.35e-05, + "loss": 0.1086, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7672955974842768, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.648936170212766, + "eval_ORGANIZATION_recall": 0.9384615384615385, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6585365853658536, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5510204081632653, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13837367296218872, + "eval_overall_accuracy": 0.953443258971872, + "eval_overall_f1": 0.8052287581699346, + "eval_overall_precision": 0.7368421052631579, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9843, + "eval_samples_per_second": 189.982, + "eval_steps_per_second": 3.048, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 1.3266977071762085, + "learning_rate": 4.3e-05, + "loss": 0.105, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.7972972972972973, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7662337662337663, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.76, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6705882352941176, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.6923076923076923, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1306816190481186, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8133333333333335, + "eval_overall_precision": 0.7568238213399504, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9815, + "eval_samples_per_second": 190.521, + "eval_steps_per_second": 3.056, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 0.9789202809333801, + "learning_rate": 4.25e-05, + "loss": 0.1018, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.7450980392156863, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6951219512195121, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7341772151898733, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6236559139784946, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8910256410256411, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7105263157894738, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.627906976744186, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7272727272727273, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.135343536734581, + "eval_overall_accuracy": 0.9544131910766246, + "eval_overall_f1": 0.8026315789473684, + "eval_overall_precision": 0.738498789346247, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9637, + "eval_samples_per_second": 194.034, + "eval_steps_per_second": 3.113, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 1.4792687892913818, + "learning_rate": 4.2e-05, + "loss": 0.0971, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7483870967741936, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6444444444444445, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.6506024096385542, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.54, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1399880051612854, + "eval_overall_accuracy": 0.9551406401551892, + "eval_overall_f1": 0.8110964332892999, + "eval_overall_precision": 0.748780487804878, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9704, + "eval_samples_per_second": 192.707, + "eval_steps_per_second": 3.092, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.6556410193443298, + "learning_rate": 4.15e-05, + "loss": 0.0934, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7724137931034483, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8903225806451613, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7123287671232875, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.7936507936507937, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7142857142857143, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1313389092683792, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8176943699731903, + "eval_overall_precision": 0.7644110275689223, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9847, + "eval_samples_per_second": 189.91, + "eval_steps_per_second": 3.047, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 0.7125768661499023, + "learning_rate": 4.1e-05, + "loss": 0.0921, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7295597484276729, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6170212765957447, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13860563933849335, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8120893561103809, + "eval_overall_precision": 0.7463768115942029, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.9729, + "eval_samples_per_second": 192.203, + "eval_steps_per_second": 3.083, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 0.7773716449737549, + "learning_rate": 4.05e-05, + "loss": 0.0898, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.7619047619047618, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7368421052631579, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.757142857142857, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7066666666666667, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8275862068965518, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13089829683303833, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.8214285714285715, + "eval_overall_precision": 0.7847769028871391, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.9698, + "eval_samples_per_second": 192.821, + "eval_steps_per_second": 3.093, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 0.5924017429351807, + "learning_rate": 4e-05, + "loss": 0.088, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.7894736842105262, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7702702702702702, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6867469879518072, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.896103896103896, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8734177215189873, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8275862068965518, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13513679802417755, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.827027027027027, + "eval_overall_precision": 0.7786259541984732, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9796, + "eval_samples_per_second": 190.9, + "eval_steps_per_second": 3.063, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 0.6240903735160828, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.084, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.7922077922077921, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7124999999999999, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8789808917197452, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8928571428571429, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8928571428571429, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13887803256511688, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8201058201058202, + "eval_overall_precision": 0.7579462102689487, + "eval_overall_recall": 0.8933717579250721, + "eval_runtime": 0.991, + "eval_samples_per_second": 188.691, + "eval_steps_per_second": 3.027, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 0.8279309868812561, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0816, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7160493827160493, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5979381443298969, + "eval_ORGANIZATION_recall": 0.8923076923076924, + "eval_PERSON_f1": 0.8903225806451613, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.675, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.574468085106383, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8727272727272727, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.15054349601268768, + "eval_overall_accuracy": 0.9527158098933075, + "eval_overall_f1": 0.8020969855832241, + "eval_overall_precision": 0.7355769230769231, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9796, + "eval_samples_per_second": 190.899, + "eval_steps_per_second": 3.063, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 1.0779181718826294, + "learning_rate": 3.85e-05, + "loss": 0.0785, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.7516778523489933, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.717948717948718, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.7549668874172185, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6627906976744186, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13867738842964172, + "eval_overall_accuracy": 0.9592628516003879, + "eval_overall_f1": 0.8161073825503355, + "eval_overall_precision": 0.7638190954773869, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9722, + "eval_samples_per_second": 192.352, + "eval_steps_per_second": 3.086, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 0.6904307007789612, + "learning_rate": 3.8e-05, + "loss": 0.0765, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.7848101265822784, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7564102564102564, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6483516483516484, + "eval_ORGANIZATION_recall": 0.9076923076923077, + "eval_PERSON_f1": 0.8846153846153846, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7123287671232875, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.14542602002620697, + "eval_overall_accuracy": 0.9544131910766246, + "eval_overall_f1": 0.8142292490118578, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.9686, + "eval_samples_per_second": 193.061, + "eval_steps_per_second": 3.097, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 0.6373988389968872, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0741, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7307692307692307, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6263736263736264, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8782051282051282, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7105263157894738, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.627906976744186, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.16748766601085663, + "eval_overall_accuracy": 0.9507759456838021, + "eval_overall_f1": 0.8037135278514588, + "eval_overall_precision": 0.7444717444717445, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9739, + "eval_samples_per_second": 192.014, + "eval_steps_per_second": 3.08, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 0.7710668444633484, + "learning_rate": 3.7e-05, + "loss": 0.074, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.794701986754967, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7412587412587412, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6794871794871795, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8753993610223643, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8404907975460123, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7105263157894738, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.627906976744186, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1493009477853775, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8129205921938089, + "eval_overall_precision": 0.7626262626262627, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.9856, + "eval_samples_per_second": 189.727, + "eval_steps_per_second": 3.044, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 0.6103322505950928, + "learning_rate": 3.65e-05, + "loss": 0.0696, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7105263157894738, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.627906976744186, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.806451612903226, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7352941176470589, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16694264113903046, + "eval_overall_accuracy": 0.9510184287099903, + "eval_overall_f1": 0.8068331143232589, + "eval_overall_precision": 0.7415458937198067, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9829, + "eval_samples_per_second": 190.244, + "eval_steps_per_second": 3.052, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 1.0529004335403442, + "learning_rate": 3.6e-05, + "loss": 0.0689, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.7843137254901961, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7317073170731707, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.708860759493671, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6021505376344086, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7200000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.16258035600185394, + "eval_overall_accuracy": 0.9522308438409312, + "eval_overall_f1": 0.808454425363276, + "eval_overall_precision": 0.7463414634146341, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9859, + "eval_samples_per_second": 189.671, + "eval_steps_per_second": 3.043, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 0.6359916925430298, + "learning_rate": 3.55e-05, + "loss": 0.0674, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7017543859649122, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5660377358490566, + "eval_ORGANIZATION_recall": 0.9230769230769231, + "eval_PERSON_f1": 0.896103896103896, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8734177215189873, + "eval_PERSON_recall": 0.92, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.15431419014930725, + "eval_overall_accuracy": 0.9541707080504365, + "eval_overall_f1": 0.8083441981747066, + "eval_overall_precision": 0.7380952380952381, + "eval_overall_recall": 0.8933717579250721, + "eval_runtime": 0.9786, + "eval_samples_per_second": 191.089, + "eval_steps_per_second": 3.066, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 0.6515541672706604, + "learning_rate": 3.5e-05, + "loss": 0.0677, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.7755102040816326, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7712418300653594, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6704545454545454, + "eval_ORGANIZATION_recall": 0.9076923076923077, + "eval_PERSON_f1": 0.90032154340836, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8727272727272727, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.14396275579929352, + "eval_overall_accuracy": 0.9597478176527643, + "eval_overall_f1": 0.8308525033829499, + "eval_overall_precision": 0.7831632653061225, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9848, + "eval_samples_per_second": 189.885, + "eval_steps_per_second": 3.046, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.44519537687301636, + "learning_rate": 3.45e-05, + "loss": 0.066, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.794701986754967, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.76, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6705882352941176, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.806451612903226, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7352941176470589, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16057845950126648, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8266666666666667, + "eval_overall_precision": 0.7692307692307693, + "eval_overall_recall": 0.8933717579250721, + "eval_runtime": 0.9671, + "eval_samples_per_second": 193.362, + "eval_steps_per_second": 3.102, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.4462006688117981, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0631, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.735483870967742, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6785714285714286, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.6951219512195123, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5757575757575758, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7200000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8571428571428571, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1714225858449936, + "eval_overall_accuracy": 0.9510184287099903, + "eval_overall_f1": 0.7989487516425756, + "eval_overall_precision": 0.7342995169082126, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9764, + "eval_samples_per_second": 191.514, + "eval_steps_per_second": 3.072, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 0.6689063310623169, + "learning_rate": 3.35e-05, + "loss": 0.06, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.7721518987341772, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7011494252873564, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.888178913738019, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.852760736196319, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8275862068965518, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.16552795469760895, + "eval_overall_accuracy": 0.9507759456838021, + "eval_overall_f1": 0.8121693121693121, + "eval_overall_precision": 0.7506112469437652, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9784, + "eval_samples_per_second": 191.123, + "eval_steps_per_second": 3.066, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 0.517295777797699, + "learning_rate": 3.3e-05, + "loss": 0.0626, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7105263157894738, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.627906976744186, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1590520441532135, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.8155080213903744, + "eval_overall_precision": 0.7605985037406484, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9758, + "eval_samples_per_second": 191.632, + "eval_steps_per_second": 3.074, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 0.3203381896018982, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0611, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.7662337662337662, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7169811320754718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6063829787234043, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8910256410256411, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.17437446117401123, + "eval_overall_accuracy": 0.950533462657614, + "eval_overall_f1": 0.8137384412153236, + "eval_overall_precision": 0.751219512195122, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9833, + "eval_samples_per_second": 190.169, + "eval_steps_per_second": 3.051, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.7219709753990173, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0574, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7702702702702702, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6867469879518072, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.161631777882576, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.8213333333333332, + "eval_overall_precision": 0.7642679900744417, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9763, + "eval_samples_per_second": 191.539, + "eval_steps_per_second": 3.073, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 1.1681548357009888, + "learning_rate": 3.15e-05, + "loss": 0.0572, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.7439024390243902, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6559139784946236, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6993865030674845, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5816326530612245, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.888888888888889, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1842304915189743, + "eval_overall_accuracy": 0.9478661493695442, + "eval_overall_f1": 0.8031088082901555, + "eval_overall_precision": 0.7294117647058823, + "eval_overall_recall": 0.8933717579250721, + "eval_runtime": 0.9771, + "eval_samples_per_second": 191.378, + "eval_steps_per_second": 3.07, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 0.9379565715789795, + "learning_rate": 3.1e-05, + "loss": 0.0571, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.7333333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6962025316455697, + "eval_LOCATION_recall": 0.7746478873239436, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9096774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.88125, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16781190037727356, + "eval_overall_accuracy": 0.9539282250242483, + "eval_overall_f1": 0.8178137651821863, + "eval_overall_precision": 0.7690355329949239, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.981, + "eval_samples_per_second": 190.62, + "eval_steps_per_second": 3.058, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 0.7336277961730957, + "learning_rate": 3.05e-05, + "loss": 0.0593, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.7547169811320754, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6818181818181818, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7307692307692307, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6263736263736264, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9038461538461539, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8703703703703703, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8571428571428571, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.17119500041007996, + "eval_overall_accuracy": 0.9522308438409312, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.9811, + "eval_samples_per_second": 190.605, + "eval_steps_per_second": 3.058, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 0.4600437879562378, + "learning_rate": 3e-05, + "loss": 0.0541, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7361111111111112, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6708860759493671, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16966694593429565, + "eval_overall_accuracy": 0.9536857419980601, + "eval_overall_f1": 0.8150134048257373, + "eval_overall_precision": 0.7619047619047619, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9756, + "eval_samples_per_second": 191.668, + "eval_steps_per_second": 3.075, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 0.5247196555137634, + "learning_rate": 2.95e-05, + "loss": 0.0533, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.738255033557047, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7051282051282052, + "eval_LOCATION_recall": 0.7746478873239436, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6551724137931034, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16588056087493896, + "eval_overall_accuracy": 0.9544131910766246, + "eval_overall_f1": 0.8176943699731903, + "eval_overall_precision": 0.7644110275689223, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.97, + "eval_samples_per_second": 192.784, + "eval_steps_per_second": 3.093, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 0.8778635859489441, + "learning_rate": 2.9e-05, + "loss": 0.0528, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9067524115755627, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8757763975155279, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.17160455882549286, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8263795423956932, + "eval_overall_precision": 0.7752525252525253, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9639, + "eval_samples_per_second": 194.002, + "eval_steps_per_second": 3.112, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 0.8205474615097046, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.051, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.7612903225806451, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7517730496453899, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6973684210526315, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1667357236146927, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8254397834912043, + "eval_overall_precision": 0.7780612244897959, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9755, + "eval_samples_per_second": 191.693, + "eval_steps_per_second": 3.075, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 0.5872923731803894, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0534, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7534246575342466, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6790123456790124, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8917197452229298, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.15715567767620087, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.824631860776439, + "eval_overall_precision": 0.77, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9676, + "eval_samples_per_second": 193.26, + "eval_steps_per_second": 3.1, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 2.212918996810913, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0479, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1691504418849945, + "eval_overall_accuracy": 0.9539282250242483, + "eval_overall_f1": 0.8161073825503355, + "eval_overall_precision": 0.7638190954773869, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9772, + "eval_samples_per_second": 191.358, + "eval_steps_per_second": 3.07, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.9500969648361206, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0479, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7671232876712328, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.691358024691358, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8945686900958466, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8588957055214724, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.15680420398712158, + "eval_overall_accuracy": 0.957807953443259, + "eval_overall_f1": 0.8279569892473119, + "eval_overall_precision": 0.7758186397984886, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9928, + "eval_samples_per_second": 188.354, + "eval_steps_per_second": 3.022, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 1.0069383382797241, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0496, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.7651006711409397, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7307692307692307, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7402597402597403, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6404494382022472, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9009584664536742, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8650306748466258, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.16615904867649078, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8213333333333332, + "eval_overall_precision": 0.7642679900744417, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9825, + "eval_samples_per_second": 190.334, + "eval_steps_per_second": 3.053, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 0.7076583504676819, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0472, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.7499999999999999, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6741573033707865, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7755102040816327, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6951219512195121, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9067524115755627, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8757763975155279, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1780806928873062, + "eval_overall_accuracy": 0.9553831231813773, + "eval_overall_f1": 0.8304405874499332, + "eval_overall_precision": 0.7736318407960199, + "eval_overall_recall": 0.8962536023054755, + "eval_runtime": 1.0493, + "eval_samples_per_second": 178.21, + "eval_steps_per_second": 2.859, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 0.3712306618690491, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0476, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.76, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6705882352941176, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9038461538461539, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8703703703703703, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.912280701754386, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.896551724137931, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.16903701424598694, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.8315508021390374, + "eval_overall_precision": 0.7755610972568578, + "eval_overall_recall": 0.8962536023054755, + "eval_runtime": 0.9799, + "eval_samples_per_second": 190.845, + "eval_steps_per_second": 3.062, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 1.6804779767990112, + "learning_rate": 2.5e-05, + "loss": 0.0456, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.76, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6705882352941176, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9096774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.88125, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.912280701754386, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.896551724137931, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.16871550679206848, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.8342245989304813, + "eval_overall_precision": 0.7780548628428927, + "eval_overall_recall": 0.899135446685879, + "eval_runtime": 0.9893, + "eval_samples_per_second": 189.023, + "eval_steps_per_second": 3.032, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 1.3438061475753784, + "learning_rate": 2.45e-05, + "loss": 0.0422, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.7547169811320754, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6818181818181818, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7368421052631579, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6436781609195402, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.9038461538461539, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8703703703703703, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18318326771259308, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8227513227513227, + "eval_overall_precision": 0.7603911980440098, + "eval_overall_recall": 0.8962536023054755, + "eval_runtime": 0.9678, + "eval_samples_per_second": 193.23, + "eval_steps_per_second": 3.1, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 0.6827456951141357, + "learning_rate": 2.4e-05, + "loss": 0.0431, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.7417218543046357, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9067524115755627, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8757763975155279, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8524590163934426, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18274861574172974, + "eval_overall_accuracy": 0.9519883608147429, + "eval_overall_f1": 0.8176943699731903, + "eval_overall_precision": 0.7644110275689223, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9802, + "eval_samples_per_second": 190.782, + "eval_steps_per_second": 3.061, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.1168053150177002, + "learning_rate": 2.35e-05, + "loss": 0.0441, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.7204968944099378, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6444444444444445, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8917197452229298, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7179487179487178, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6222222222222222, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8524590163934426, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19840683043003082, + "eval_overall_accuracy": 0.9485935984481086, + "eval_overall_f1": 0.8010403120936281, + "eval_overall_precision": 0.7298578199052133, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.9771, + "eval_samples_per_second": 191.384, + "eval_steps_per_second": 3.07, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 1.451138973236084, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.044, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.7466666666666666, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7088607594936709, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.713375796178344, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6086956521739131, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8945686900958466, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8588957055214724, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7837837837837839, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7073170731707317, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.17208197712898254, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8164893617021276, + "eval_overall_precision": 0.7580246913580246, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9762, + "eval_samples_per_second": 191.562, + "eval_steps_per_second": 3.073, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 0.4351230263710022, + "learning_rate": 2.25e-05, + "loss": 0.0423, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.7417218543046357, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7, + "eval_LOCATION_recall": 0.7887323943661971, + "eval_ORGANIZATION_f1": 0.7450980392156863, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6477272727272727, + "eval_ORGANIZATION_recall": 0.8769230769230769, + "eval_PERSON_f1": 0.9096774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.88125, + "eval_PERSON_recall": 0.94, + "eval_QUANTITY_f1": 0.7012987012987013, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6136363636363636, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.17615394294261932, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8186666666666667, + "eval_overall_precision": 0.7617866004962779, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.989, + "eval_samples_per_second": 189.087, + "eval_steps_per_second": 3.033, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 1.645548701286316, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0423, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.7586206896551724, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7432432432432432, + "eval_LOCATION_recall": 0.7746478873239436, + "eval_ORGANIZATION_f1": 0.7482993197278912, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6707317073170732, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.90032154340836, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18074527382850647, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8233695652173914, + "eval_overall_precision": 0.7789203084832905, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9759, + "eval_samples_per_second": 191.621, + "eval_steps_per_second": 3.074, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 0.577072262763977, + "learning_rate": 2.15e-05, + "loss": 0.0403, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7222222222222222, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6582278481012658, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1799338161945343, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.8189189189189189, + "eval_overall_precision": 0.7709923664122137, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 1.0732, + "eval_samples_per_second": 174.244, + "eval_steps_per_second": 2.795, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 0.94347083568573, + "learning_rate": 2.1e-05, + "loss": 0.0406, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.76056338028169, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7012987012987013, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.17427271604537964, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8276797829036635, + "eval_overall_precision": 0.782051282051282, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9768, + "eval_samples_per_second": 191.446, + "eval_steps_per_second": 3.071, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 0.4477824568748474, + "learning_rate": 2.05e-05, + "loss": 0.0398, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.7388535031847132, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6744186046511628, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7248322147651007, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6428571428571429, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9061488673139159, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8805031446540881, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.18469205498695374, + "eval_overall_accuracy": 0.9544131910766246, + "eval_overall_f1": 0.8128342245989304, + "eval_overall_precision": 0.7581047381546134, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9789, + "eval_samples_per_second": 191.035, + "eval_steps_per_second": 3.065, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 2.0811376571655273, + "learning_rate": 2e-05, + "loss": 0.0396, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.778523489932886, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7435897435897436, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7417218543046358, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6511627906976745, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8974358974358974, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8641975308641975, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18184275925159454, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.825268817204301, + "eval_overall_precision": 0.7732997481108312, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9783, + "eval_samples_per_second": 191.157, + "eval_steps_per_second": 3.067, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 1.645167589187622, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0377, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7375, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7248322147651007, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6428571428571429, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.90032154340836, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7200000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18943847715854645, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.8203753351206434, + "eval_overall_precision": 0.7669172932330827, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9731, + "eval_samples_per_second": 192.166, + "eval_steps_per_second": 3.083, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 2.3521628379821777, + "learning_rate": 1.9e-05, + "loss": 0.0369, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.7549668874172185, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7125, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7123287671232877, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6419753086419753, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8910256410256411, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1841648370027542, + "eval_overall_accuracy": 0.9541707080504365, + "eval_overall_f1": 0.8129205921938089, + "eval_overall_precision": 0.7626262626262627, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.9689, + "eval_samples_per_second": 193.0, + "eval_steps_per_second": 3.096, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 0.7499018311500549, + "learning_rate": 1.85e-05, + "loss": 0.0372, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.7755102040816326, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7189542483660131, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.912280701754386, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.896551724137931, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18059784173965454, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.8216216216216217, + "eval_overall_precision": 0.7735368956743003, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9745, + "eval_samples_per_second": 191.894, + "eval_steps_per_second": 3.079, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 0.47230446338653564, + "learning_rate": 1.8e-05, + "loss": 0.0383, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.7651006711409397, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7307692307692307, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.713375796178344, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6086956521739131, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.888888888888889, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.18837134540081024, + "eval_overall_accuracy": 0.9536857419980601, + "eval_overall_f1": 0.8105960264900663, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9896, + "eval_samples_per_second": 188.972, + "eval_steps_per_second": 3.032, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 0.5167880058288574, + "learning_rate": 1.75e-05, + "loss": 0.039, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.76, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7215189873417721, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7333333333333334, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6470588235294118, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.90032154340836, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1823243796825409, + "eval_overall_accuracy": 0.9546556741028128, + "eval_overall_f1": 0.8187919463087249, + "eval_overall_precision": 0.7663316582914573, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.975, + "eval_samples_per_second": 191.801, + "eval_steps_per_second": 3.077, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 1.0821930170059204, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0366, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.7307692307692307, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6705882352941176, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9061488673139159, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8805031446540881, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.19406890869140625, + "eval_overall_accuracy": 0.9536857419980601, + "eval_overall_f1": 0.8112449799196788, + "eval_overall_precision": 0.7575, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 1.0044, + "eval_samples_per_second": 186.175, + "eval_steps_per_second": 2.987, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 1.0225999355316162, + "learning_rate": 1.65e-05, + "loss": 0.0355, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.7866666666666666, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7468354430379747, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7248322147651007, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6428571428571429, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9025974025974027, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.879746835443038, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1837739795446396, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8281461434370772, + "eval_overall_precision": 0.7806122448979592, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9686, + "eval_samples_per_second": 193.057, + "eval_steps_per_second": 3.097, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 1.016437292098999, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0353, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7412587412587412, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6794871794871795, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1804400384426117, + "eval_overall_accuracy": 0.9575654704170709, + "eval_overall_f1": 0.824966078697422, + "eval_overall_precision": 0.7794871794871795, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9852, + "eval_samples_per_second": 189.806, + "eval_steps_per_second": 3.045, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 0.6006755828857422, + "learning_rate": 1.55e-05, + "loss": 0.034, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.7662337662337662, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6179775280898876, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.17799408733844757, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.816, + "eval_overall_precision": 0.7593052109181141, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9698, + "eval_samples_per_second": 192.819, + "eval_steps_per_second": 3.093, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 0.25187569856643677, + "learning_rate": 1.5e-05, + "loss": 0.0339, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.7532467532467533, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6987951807228916, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7333333333333334, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6470588235294118, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.9061488673139159, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8805031446540881, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19465653598308563, + "eval_overall_accuracy": 0.9551406401551892, + "eval_overall_f1": 0.8214765100671141, + "eval_overall_precision": 0.7688442211055276, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9694, + "eval_samples_per_second": 192.895, + "eval_steps_per_second": 3.095, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 0.6274309754371643, + "learning_rate": 1.45e-05, + "loss": 0.0336, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.7483870967741935, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6904761904761905, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7152317880794703, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.627906976744186, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9032258064516129, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.875, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19896024465560913, + "eval_overall_accuracy": 0.9539282250242483, + "eval_overall_f1": 0.816, + "eval_overall_precision": 0.7593052109181141, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9708, + "eval_samples_per_second": 192.631, + "eval_steps_per_second": 3.09, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 0.8582583665847778, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0335, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7361111111111112, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6708860759493671, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.18779677152633667, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.8211382113821138, + "eval_overall_precision": 0.7749360613810742, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9722, + "eval_samples_per_second": 192.346, + "eval_steps_per_second": 3.086, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 0.1207260712981224, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0326, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7417218543046358, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6511627906976745, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.9061488673139159, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8805031446540881, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.20128171145915985, + "eval_overall_accuracy": 0.9551406401551892, + "eval_overall_f1": 0.8214765100671141, + "eval_overall_precision": 0.7688442211055276, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9818, + "eval_samples_per_second": 190.471, + "eval_steps_per_second": 3.056, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 1.50920569896698, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0326, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7397260273972603, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.90032154340836, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1905485987663269, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8243243243243243, + "eval_overall_precision": 0.7760814249363868, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9769, + "eval_samples_per_second": 191.427, + "eval_steps_per_second": 3.071, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.6023324131965637, + "learning_rate": 1.25e-05, + "loss": 0.0334, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7199999999999999, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6352941176470588, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.9032258064516129, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.875, + "eval_PERSON_recall": 0.9333333333333333, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1859501153230667, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.8144192256341789, + "eval_overall_precision": 0.7587064676616916, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9744, + "eval_samples_per_second": 191.903, + "eval_steps_per_second": 3.079, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 0.02606707252562046, + "learning_rate": 1.2e-05, + "loss": 0.0321, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.7612903225806451, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7105263157894737, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6206896551724138, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1832735538482666, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8128342245989304, + "eval_overall_precision": 0.7581047381546134, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9766, + "eval_samples_per_second": 191.478, + "eval_steps_per_second": 3.072, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 4.191733360290527, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0313, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7310344827586207, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6625, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.18496394157409668, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8190476190476191, + "eval_overall_precision": 0.7757731958762887, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.9731, + "eval_samples_per_second": 192.163, + "eval_steps_per_second": 3.083, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 0.9226277470588684, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0307, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1953212469816208, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8209959623149394, + "eval_overall_precision": 0.7702020202020202, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9836, + "eval_samples_per_second": 190.108, + "eval_steps_per_second": 3.05, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 0.3072105646133423, + "learning_rate": 1.05e-05, + "loss": 0.03, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.7682119205298014, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.725, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7333333333333334, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6470588235294118, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7200000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.2003144919872284, + "eval_overall_accuracy": 0.9539282250242483, + "eval_overall_f1": 0.8161073825503355, + "eval_overall_precision": 0.7638190954773869, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9782, + "eval_samples_per_second": 191.166, + "eval_steps_per_second": 3.067, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 0.6956435441970825, + "learning_rate": 1e-05, + "loss": 0.0283, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.7682119205298014, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.725, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.763888888888889, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6962025316455697, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.9025974025974027, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.879746835443038, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1938561648130417, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8267394270122782, + "eval_overall_precision": 0.7849740932642487, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9708, + "eval_samples_per_second": 192.617, + "eval_steps_per_second": 3.09, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 1.2889044284820557, + "learning_rate": 9.5e-06, + "loss": 0.0313, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.76, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7215189873417721, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19479171931743622, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8238482384823849, + "eval_overall_precision": 0.7774936061381074, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9964, + "eval_samples_per_second": 187.68, + "eval_steps_per_second": 3.011, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 0.9364153146743774, + "learning_rate": 9e-06, + "loss": 0.0302, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.7612903225806451, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7619047619047619, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6829268292682927, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7200000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.2050865739583969, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8241610738255033, + "eval_overall_precision": 0.7713567839195979, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.9804, + "eval_samples_per_second": 190.74, + "eval_steps_per_second": 3.06, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 0.5895254611968994, + "learning_rate": 8.500000000000002e-06, + "loss": 0.031, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.7866666666666666, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7468354430379747, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7027027027027027, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6341463414634146, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19783811271190643, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8260869565217391, + "eval_overall_precision": 0.781491002570694, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9978, + "eval_samples_per_second": 187.41, + "eval_steps_per_second": 3.007, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 1.4140515327453613, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0314, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.7532467532467533, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6987951807228916, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20159946382045746, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.8172043010752689, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.976, + "eval_samples_per_second": 191.603, + "eval_steps_per_second": 3.074, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 1.0855952501296997, + "learning_rate": 7.5e-06, + "loss": 0.0302, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.7612903225806451, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7123287671232875, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20786643028259277, + "eval_overall_accuracy": 0.9536857419980601, + "eval_overall_f1": 0.8172043010752689, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9768, + "eval_samples_per_second": 191.446, + "eval_steps_per_second": 3.071, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 0.33834901452064514, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0307, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7552447552447553, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6923076923076923, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8938906752411575, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7027027027027027, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6341463414634146, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19879013299942017, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8200270635994588, + "eval_overall_precision": 0.7729591836734694, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9798, + "eval_samples_per_second": 190.857, + "eval_steps_per_second": 3.062, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 0.3656585216522217, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0269, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.7682119205298014, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.725, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7586206896551724, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.2026471644639969, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8265582655826558, + "eval_overall_precision": 0.7800511508951407, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9677, + "eval_samples_per_second": 193.251, + "eval_steps_per_second": 3.1, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 0.6935352087020874, + "learning_rate": 6e-06, + "loss": 0.0287, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7586206896551724, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.201119527220726, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8259109311740891, + "eval_overall_precision": 0.7766497461928934, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 1.0234, + "eval_samples_per_second": 182.724, + "eval_steps_per_second": 2.931, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 0.4853912591934204, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0288, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.7682119205298014, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.725, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7534246575342466, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6790123456790124, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.2025364488363266, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8243243243243243, + "eval_overall_precision": 0.7760814249363868, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9778, + "eval_samples_per_second": 191.251, + "eval_steps_per_second": 3.068, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 0.4491923749446869, + "learning_rate": 5e-06, + "loss": 0.0285, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7517730496453899, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6973684210526315, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20170889794826508, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8222523744911805, + "eval_overall_precision": 0.7769230769230769, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.9825, + "eval_samples_per_second": 190.328, + "eval_steps_per_second": 3.053, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 0.47789445519447327, + "learning_rate": 4.5e-06, + "loss": 0.0281, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1998133510351181, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8236877523553162, + "eval_overall_precision": 0.7727272727272727, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9821, + "eval_samples_per_second": 190.408, + "eval_steps_per_second": 3.055, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 0.6101423501968384, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0276, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7341772151898734, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7659574468085107, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.7105263157894737, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19945763051509857, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8310626702997276, + "eval_overall_precision": 0.7881136950904393, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9655, + "eval_samples_per_second": 193.681, + "eval_steps_per_second": 3.107, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.3276668190956116, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0275, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.763888888888889, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6962025316455697, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1999932825565338, + "eval_overall_accuracy": 0.9573229873908826, + "eval_overall_f1": 0.830393487109905, + "eval_overall_precision": 0.7846153846153846, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.9774, + "eval_samples_per_second": 191.321, + "eval_steps_per_second": 3.069, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 0.6009638905525208, + "learning_rate": 3e-06, + "loss": 0.0271, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.19967904686927795, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8227334235453315, + "eval_overall_precision": 0.7755102040816326, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.9866, + "eval_samples_per_second": 189.542, + "eval_steps_per_second": 3.041, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 0.982728123664856, + "learning_rate": 2.5e-06, + "loss": 0.0271, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20181794464588165, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.8243243243243243, + "eval_overall_precision": 0.7760814249363868, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9779, + "eval_samples_per_second": 191.22, + "eval_steps_per_second": 3.068, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 0.6564426422119141, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0268, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7073170731707317, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7586206896551724, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20548370480537415, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.8232118758434548, + "eval_overall_precision": 0.7741116751269036, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9817, + "eval_samples_per_second": 190.495, + "eval_steps_per_second": 3.056, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 0.6329734921455383, + "learning_rate": 1.5e-06, + "loss": 0.0279, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6835443037974683, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20235015451908112, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8238482384823849, + "eval_overall_precision": 0.7774936061381074, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 1.0426, + "eval_samples_per_second": 179.365, + "eval_steps_per_second": 2.878, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 0.46473032236099243, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0267, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7482993197278912, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6707317073170732, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7297297297297297, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6585365853658537, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.2065351903438568, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.8232118758434548, + "eval_overall_precision": 0.7741116751269036, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9772, + "eval_samples_per_second": 191.37, + "eval_steps_per_second": 3.07, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 1.0716484785079956, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0273, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.7631578947368421, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20370545983314514, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.8254397834912043, + "eval_overall_precision": 0.7780612244897959, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9787, + "eval_samples_per_second": 191.069, + "eval_steps_per_second": 3.065, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 0.5122187733650208, + "learning_rate": 0.0, + "loss": 0.0257, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.7682119205298014, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.725, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.7448275862068966, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.675, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8996763754045306, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8742138364779874, + "eval_PERSON_recall": 0.9266666666666666, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.20310480892658234, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8265582655826558, + "eval_overall_precision": 0.7800511508951407, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.9728, + "eval_samples_per_second": 192.231, + "eval_steps_per_second": 3.084, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 5123217774191952.0, + "train_loss": 0.07355985648227188, + "train_runtime": 1346.3141, + "train_samples_per_second": 125.379, + "train_steps_per_second": 7.873 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 5123217774191952.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}