diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 1.289084553718567, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.9293, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.012738853503184716, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.25, + "eval_PERSON_recall": 0.006535947712418301, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.6359996199607849, + "eval_overall_accuracy": 0.8357691354966559, + "eval_overall_f1": 0.005698005698005698, + "eval_overall_precision": 0.2, + "eval_overall_recall": 0.002890173410404624, + "eval_runtime": 0.3296, + "eval_samples_per_second": 567.378, + "eval_steps_per_second": 9.102, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 1.5572154521942139, + "learning_rate": 4.9e-05, + "loss": 0.5712, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.22471910112359553, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5, + "eval_LOCATION_recall": 0.14492753623188406, + "eval_ORGANIZATION_f1": 0.046511627906976744, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.08, + "eval_ORGANIZATION_recall": 0.03278688524590164, + "eval_PERSON_f1": 0.45, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.4311377245508982, + "eval_PERSON_recall": 0.47058823529411764, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.5384615384615384, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.6666666666666666, + "eval_TIME_recall": 0.45161290322580644, + "eval_loss": 0.43386390805244446, + "eval_overall_accuracy": 0.8806044092147635, + "eval_overall_f1": 0.33851468048359246, + "eval_overall_precision": 0.4206008583690987, + "eval_overall_recall": 0.2832369942196532, + "eval_runtime": 0.3244, + "eval_samples_per_second": 576.457, + "eval_steps_per_second": 9.248, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 1.6938573122024536, + "learning_rate": 4.85e-05, + "loss": 0.4084, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.5405405405405406, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5063291139240507, + "eval_LOCATION_recall": 0.5797101449275363, + "eval_ORGANIZATION_f1": 0.29059829059829057, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.30357142857142855, + "eval_ORGANIZATION_recall": 0.2786885245901639, + "eval_PERSON_f1": 0.6467391304347827, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.5534883720930233, + "eval_PERSON_recall": 0.7777777777777778, + "eval_QUANTITY_f1": 0.22580645161290322, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.23333333333333334, + "eval_QUANTITY_recall": 0.21875, + "eval_TIME_f1": 0.6875, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.6666666666666666, + "eval_TIME_recall": 0.7096774193548387, + "eval_loss": 0.3173196017742157, + "eval_overall_accuracy": 0.9165221699281645, + "eval_overall_f1": 0.5401844532279314, + "eval_overall_precision": 0.4963680387409201, + "eval_overall_recall": 0.5924855491329479, + "eval_runtime": 0.3294, + "eval_samples_per_second": 567.711, + "eval_steps_per_second": 9.108, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 3.1388819217681885, + "learning_rate": 4.8e-05, + "loss": 0.306, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.6143790849673202, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5595238095238095, + "eval_LOCATION_recall": 0.6811594202898551, + "eval_ORGANIZATION_f1": 0.4444444444444445, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.4642857142857143, + "eval_ORGANIZATION_recall": 0.4262295081967213, + "eval_PERSON_f1": 0.7669616519174042, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.6989247311827957, + "eval_PERSON_recall": 0.8496732026143791, + "eval_QUANTITY_f1": 0.6086956521739131, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5675675675675675, + "eval_QUANTITY_recall": 0.65625, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8064516129032258, + "eval_loss": 0.22655707597732544, + "eval_overall_accuracy": 0.9370819915779044, + "eval_overall_f1": 0.672972972972973, + "eval_overall_precision": 0.631979695431472, + "eval_overall_recall": 0.7196531791907514, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.669, + "eval_steps_per_second": 9.011, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 2.1097211837768555, + "learning_rate": 4.75e-05, + "loss": 0.2369, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.6285714285714286, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5188679245283019, + "eval_LOCATION_recall": 0.7971014492753623, + "eval_ORGANIZATION_f1": 0.5442176870748299, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.46511627906976744, + "eval_ORGANIZATION_recall": 0.6557377049180327, + "eval_PERSON_f1": 0.7930029154518949, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.7157894736842105, + "eval_PERSON_recall": 0.8888888888888888, + "eval_QUANTITY_f1": 0.5499999999999999, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.4583333333333333, + "eval_QUANTITY_recall": 0.6875, + "eval_TIME_f1": 0.8125, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.21178996562957764, + "eval_overall_accuracy": 0.9380728263562051, + "eval_overall_f1": 0.6897404202719406, + "eval_overall_precision": 0.6025917926565875, + "eval_overall_recall": 0.8063583815028902, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.65, + "eval_steps_per_second": 9.026, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 1.1763736009597778, + "learning_rate": 4.7e-05, + "loss": 0.1993, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.7037037037037037, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6129032258064516, + "eval_LOCATION_recall": 0.8260869565217391, + "eval_ORGANIZATION_f1": 0.6153846153846154, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5365853658536586, + "eval_ORGANIZATION_recall": 0.7213114754098361, + "eval_PERSON_f1": 0.8195718654434252, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.7701149425287356, + "eval_PERSON_recall": 0.8758169934640523, + "eval_QUANTITY_f1": 0.5753424657534246, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5121951219512195, + "eval_QUANTITY_recall": 0.65625, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.17126347124576569, + "eval_overall_accuracy": 0.9482288828337875, + "eval_overall_f1": 0.7369791666666667, + "eval_overall_precision": 0.6706161137440758, + "eval_overall_recall": 0.8179190751445087, + "eval_runtime": 0.3356, + "eval_samples_per_second": 557.292, + "eval_steps_per_second": 8.941, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 1.0250859260559082, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1745, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.7466666666666666, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.691358024691358, + "eval_LOCATION_recall": 0.8115942028985508, + "eval_ORGANIZATION_f1": 0.6122448979591837, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5232558139534884, + "eval_ORGANIZATION_recall": 0.7377049180327869, + "eval_PERSON_f1": 0.8385093167701864, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.7988165680473372, + "eval_PERSON_recall": 0.8823529411764706, + "eval_QUANTITY_f1": 0.6216216216216217, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5476190476190477, + "eval_QUANTITY_recall": 0.71875, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15598997473716736, + "eval_overall_accuracy": 0.9512013871686896, + "eval_overall_f1": 0.7566137566137565, + "eval_overall_precision": 0.697560975609756, + "eval_overall_recall": 0.8265895953757225, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.394, + "eval_steps_per_second": 9.022, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 0.9757563471794128, + "learning_rate": 4.600000000000001e-05, + "loss": 0.158, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6344827586206897, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5476190476190477, + "eval_ORGANIZATION_recall": 0.7540983606557377, + "eval_PERSON_f1": 0.8411214953271029, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8035714285714286, + "eval_PERSON_recall": 0.8823529411764706, + "eval_QUANTITY_f1": 0.6301369863013699, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5609756097560976, + "eval_QUANTITY_recall": 0.71875, + "eval_TIME_f1": 0.7536231884057972, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.6842105263157895, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.15038101375102997, + "eval_overall_accuracy": 0.9521922219469904, + "eval_overall_f1": 0.7595269382391591, + "eval_overall_precision": 0.6963855421686747, + "eval_overall_recall": 0.8352601156069365, + "eval_runtime": 0.3277, + "eval_samples_per_second": 570.677, + "eval_steps_per_second": 9.155, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 1.060363531112671, + "learning_rate": 4.55e-05, + "loss": 0.1464, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.7692307692307693, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6153846153846154, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5365853658536586, + "eval_ORGANIZATION_recall": 0.7213114754098361, + "eval_PERSON_f1": 0.8490566037735848, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.8823529411764706, + "eval_QUANTITY_f1": 0.6666666666666665, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14481940865516663, + "eval_overall_accuracy": 0.9529353480307159, + "eval_overall_f1": 0.7655629139072848, + "eval_overall_precision": 0.706601466992665, + "eval_overall_recall": 0.8352601156069365, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.801, + "eval_steps_per_second": 9.013, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 0.9707283973693848, + "learning_rate": 4.5e-05, + "loss": 0.14, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.786206896551724, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8260869565217391, + "eval_ORGANIZATION_f1": 0.6222222222222222, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5675675675675675, + "eval_ORGANIZATION_recall": 0.6885245901639344, + "eval_PERSON_f1": 0.8444444444444443, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8209876543209876, + "eval_PERSON_recall": 0.869281045751634, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.8059701492537312, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13176114857196808, + "eval_overall_accuracy": 0.9583849393113698, + "eval_overall_f1": 0.7770177838577291, + "eval_overall_precision": 0.7376623376623377, + "eval_overall_recall": 0.8208092485549133, + "eval_runtime": 0.3277, + "eval_samples_per_second": 570.689, + "eval_steps_per_second": 9.155, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 0.8272536396980286, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.1336, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.7662337662337662, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6259541984732824, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5857142857142857, + "eval_ORGANIZATION_recall": 0.6721311475409836, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8888888888888888, + "eval_QUANTITY_f1": 0.6575342465753423, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5853658536585366, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.7941176470588235, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7297297297297297, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13256430625915527, + "eval_overall_accuracy": 0.955907852365618, + "eval_overall_f1": 0.7735849056603774, + "eval_overall_precision": 0.7247474747474747, + "eval_overall_recall": 0.8294797687861272, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.93, + "eval_steps_per_second": 9.031, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 2.3991382122039795, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.126, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.7894736842105263, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7228915662650602, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6616541353383459, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6111111111111112, + "eval_ORGANIZATION_recall": 0.7213114754098361, + "eval_PERSON_f1": 0.8695652173913043, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8284023668639053, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.6486486486486486, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5714285714285714, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.8059701492537312, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13229668140411377, + "eval_overall_accuracy": 0.9568986871439188, + "eval_overall_f1": 0.7887700534759359, + "eval_overall_precision": 0.7338308457711443, + "eval_overall_recall": 0.8526011560693642, + "eval_runtime": 0.3344, + "eval_samples_per_second": 559.23, + "eval_steps_per_second": 8.972, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 0.8172109723091125, + "learning_rate": 4.35e-05, + "loss": 0.1209, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.7947019867549668, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7317073170731707, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6808510638297871, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.7868852459016393, + "eval_PERSON_f1": 0.8687500000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8323353293413174, + "eval_PERSON_recall": 0.9084967320261438, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.12979264557361603, + "eval_overall_accuracy": 0.9601189001733961, + "eval_overall_f1": 0.8026845637583893, + "eval_overall_precision": 0.7493734335839599, + "eval_overall_recall": 0.8641618497109826, + "eval_runtime": 0.3357, + "eval_samples_per_second": 557.123, + "eval_steps_per_second": 8.938, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 1.386945366859436, + "learning_rate": 4.3e-05, + "loss": 0.1172, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6883116883116883, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5698924731182796, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8650306748466258, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.815028901734104, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.6486486486486486, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.5714285714285714, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.7536231884057972, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.6842105263157895, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14491254091262817, + "eval_overall_accuracy": 0.9529353480307159, + "eval_overall_f1": 0.7839586028460543, + "eval_overall_precision": 0.7096018735362998, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3283, + "eval_samples_per_second": 569.521, + "eval_steps_per_second": 9.137, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 0.46282199025154114, + "learning_rate": 4.25e-05, + "loss": 0.1135, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.7999999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6969696969696969, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7540983606557377, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8888888888888888, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.12623363733291626, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8032786885245903, + "eval_overall_precision": 0.7616580310880829, + "eval_overall_recall": 0.8497109826589595, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.012, + "eval_steps_per_second": 9.016, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 0.5779574513435364, + "learning_rate": 4.2e-05, + "loss": 0.1087, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.620253164556962, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8624999999999999, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8263473053892215, + "eval_PERSON_recall": 0.9019607843137255, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1268586367368698, + "eval_overall_accuracy": 0.9611097349516968, + "eval_overall_f1": 0.8070175438596492, + "eval_overall_precision": 0.7569620253164557, + "eval_overall_recall": 0.8641618497109826, + "eval_runtime": 0.326, + "eval_samples_per_second": 573.612, + "eval_steps_per_second": 9.202, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.2563510537147522, + "learning_rate": 4.15e-05, + "loss": 0.1041, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.7999999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7375886524822695, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.65, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8652037617554859, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8313253012048193, + "eval_PERSON_recall": 0.9019607843137255, + "eval_QUANTITY_f1": 0.7027027027027026, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6190476190476191, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8181818181818182, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7714285714285715, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13557660579681396, + "eval_overall_accuracy": 0.9583849393113698, + "eval_overall_f1": 0.8079999999999999, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.696, + "eval_steps_per_second": 9.011, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 2.330004930496216, + "learning_rate": 4.1e-05, + "loss": 0.102, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6818181818181818, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6338028169014085, + "eval_ORGANIZATION_recall": 0.7377049180327869, + "eval_PERSON_f1": 0.8643533123028391, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.8954248366013072, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.128007709980011, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.7994542974079127, + "eval_overall_precision": 0.7571059431524548, + "eval_overall_recall": 0.846820809248555, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.934, + "eval_steps_per_second": 9.031, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 1.4404404163360596, + "learning_rate": 4.05e-05, + "loss": 0.0999, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.8299319727891156, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6715328467153285, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6052631578947368, + "eval_ORGANIZATION_recall": 0.7540983606557377, + "eval_PERSON_f1": 0.8706624605678233, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.9019607843137255, + "eval_QUANTITY_f1": 0.6857142857142857, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.12171030044555664, + "eval_overall_accuracy": 0.9623482784245727, + "eval_overall_f1": 0.8043478260869564, + "eval_overall_precision": 0.7589743589743589, + "eval_overall_recall": 0.8554913294797688, + "eval_runtime": 0.3267, + "eval_samples_per_second": 572.375, + "eval_steps_per_second": 9.182, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 0.6656851768493652, + "learning_rate": 4e-05, + "loss": 0.0942, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.7894736842105263, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7228915662650602, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7391304347826089, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6623376623376623, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8757763975155279, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.834319526627219, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7222222222222223, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.782608695652174, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7105263157894737, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13243292272090912, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8100929614873839, + "eval_overall_precision": 0.7493857493857494, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.253, + "eval_steps_per_second": 9.02, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 0.6464937329292297, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0932, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6931818181818182, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7361111111111112, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6385542168674698, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8730650154798762, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8294117647058824, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.6944444444444444, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.625, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.7647058823529411, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7027027027027027, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14281630516052246, + "eval_overall_accuracy": 0.9564032697547684, + "eval_overall_f1": 0.8010471204188482, + "eval_overall_precision": 0.7320574162679426, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.3327, + "eval_samples_per_second": 561.987, + "eval_steps_per_second": 9.016, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 1.1358213424682617, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0916, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7445255474452555, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6710526315789473, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8785046728971961, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8392857142857143, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.12807393074035645, + "eval_overall_accuracy": 0.9628436958137231, + "eval_overall_f1": 0.8200270635994588, + "eval_overall_precision": 0.7709923664122137, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.083, + "eval_steps_per_second": 9.017, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 0.7238495945930481, + "learning_rate": 3.85e-05, + "loss": 0.0892, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.8163265306122449, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7445255474452555, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6710526315789473, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.1260461062192917, + "eval_overall_accuracy": 0.9628436958137231, + "eval_overall_f1": 0.8216216216216216, + "eval_overall_precision": 0.7715736040609137, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.346, + "eval_steps_per_second": 9.022, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 1.773553490638733, + "learning_rate": 3.8e-05, + "loss": 0.0865, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.7692307692307693, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7412587412587412, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6463414634146342, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7647058823529411, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7027027027027027, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14333529770374298, + "eval_overall_accuracy": 0.9578895219222194, + "eval_overall_f1": 0.8073878627968338, + "eval_overall_precision": 0.7427184466019418, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.546, + "eval_steps_per_second": 9.009, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 1.2657068967819214, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0834, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7746478873239437, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6790123456790124, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7222222222222223, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13225042819976807, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8246318607764391, + "eval_overall_precision": 0.7680798004987531, + "eval_overall_recall": 0.8901734104046243, + "eval_runtime": 0.3265, + "eval_samples_per_second": 572.661, + "eval_steps_per_second": 9.187, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 1.338986873626709, + "learning_rate": 3.7e-05, + "loss": 0.0823, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7638888888888888, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6626506024096386, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13243231177330017, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8273092369477911, + "eval_overall_precision": 0.770573566084788, + "eval_overall_recall": 0.8930635838150289, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.211, + "eval_steps_per_second": 9.035, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 1.9946244955062866, + "learning_rate": 3.65e-05, + "loss": 0.0809, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7391304347826089, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6623376623376623, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13205386698246002, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8216216216216216, + "eval_overall_precision": 0.7715736040609137, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.171, + "eval_steps_per_second": 9.179, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 1.337754487991333, + "learning_rate": 3.6e-05, + "loss": 0.0793, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6766917293233082, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.7377049180327869, + "eval_PERSON_f1": 0.8867924528301887, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8545454545454545, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.1275886595249176, + "eval_overall_accuracy": 0.9618528610354223, + "eval_overall_f1": 0.809782608695652, + "eval_overall_precision": 0.764102564102564, + "eval_overall_recall": 0.861271676300578, + "eval_runtime": 0.3333, + "eval_samples_per_second": 561.108, + "eval_steps_per_second": 9.002, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 1.4286695718765259, + "learning_rate": 3.55e-05, + "loss": 0.0759, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7428571428571428, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6582278481012658, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.880503144654088, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7536231884057971, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7027027027027027, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13053637742996216, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8205128205128205, + "eval_overall_precision": 0.769620253164557, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3332, + "eval_samples_per_second": 561.235, + "eval_steps_per_second": 9.004, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 1.020611047744751, + "learning_rate": 3.5e-05, + "loss": 0.0742, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.7999999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.726027397260274, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7164179104477612, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.75, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13522376120090485, + "eval_overall_accuracy": 0.9596234827842457, + "eval_overall_f1": 0.8139223560910309, + "eval_overall_precision": 0.7581047381546134, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.334, + "eval_samples_per_second": 559.963, + "eval_steps_per_second": 8.983, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.5647078156471252, + "learning_rate": 3.45e-05, + "loss": 0.0755, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7014925373134328, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6438356164383562, + "eval_ORGANIZATION_recall": 0.7704918032786885, + "eval_PERSON_f1": 0.8777429467084639, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8433734939759037, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.12345639616250992, + "eval_overall_accuracy": 0.9628436958137231, + "eval_overall_f1": 0.8125000000000001, + "eval_overall_precision": 0.7666666666666667, + "eval_overall_recall": 0.8641618497109826, + "eval_runtime": 0.326, + "eval_samples_per_second": 573.599, + "eval_steps_per_second": 9.202, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.8619909286499023, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0709, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.7919463087248322, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7375, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8852459016393442, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.126638263463974, + "eval_overall_accuracy": 0.9628436958137231, + "eval_overall_f1": 0.8209959623149394, + "eval_overall_precision": 0.7682619647355163, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.145, + "eval_steps_per_second": 9.179, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 3.400688886642456, + "learning_rate": 3.35e-05, + "loss": 0.0711, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.8163265306122449, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.762589928057554, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6794871794871795, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.8125, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.12633301317691803, + "eval_overall_accuracy": 0.9645776566757494, + "eval_overall_f1": 0.8272108843537415, + "eval_overall_precision": 0.781491002570694, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.79, + "eval_steps_per_second": 9.157, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 0.6063269376754761, + "learning_rate": 3.3e-05, + "loss": 0.0698, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.7947019867549668, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7317073170731707, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.726027397260274, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8875000000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8502994011976048, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7222222222222223, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13981756567955017, + "eval_overall_accuracy": 0.9591280653950953, + "eval_overall_f1": 0.8132450331125827, + "eval_overall_precision": 0.7506112469437652, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.139, + "eval_steps_per_second": 9.018, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 2.01218843460083, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0669, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6987951807228916, + "eval_LOCATION_recall": 0.8405797101449275, + "eval_ORGANIZATION_f1": 0.7248322147651006, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6136363636363636, + "eval_ORGANIZATION_recall": 0.8852459016393442, + "eval_PERSON_f1": 0.8895899053627759, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8597560975609756, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8125, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.1391025334596634, + "eval_overall_accuracy": 0.958632648005945, + "eval_overall_f1": 0.8100929614873839, + "eval_overall_precision": 0.7493857493857494, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.911, + "eval_steps_per_second": 9.015, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.48844727873802185, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0659, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7310344827586208, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6309523809523809, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8930817610062893, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8606060606060606, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13162988424301147, + "eval_overall_accuracy": 0.9623482784245727, + "eval_overall_f1": 0.8192771084337348, + "eval_overall_precision": 0.7630922693266833, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.896, + "eval_steps_per_second": 9.014, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 1.3096176385879517, + "learning_rate": 3.15e-05, + "loss": 0.0627, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7297297297297297, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6206896551724138, + "eval_ORGANIZATION_recall": 0.8852459016393442, + "eval_PERSON_f1": 0.8895899053627759, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8597560975609756, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7536231884057971, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7027027027027027, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13473619520664215, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8186666666666668, + "eval_overall_precision": 0.7599009900990099, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3342, + "eval_samples_per_second": 559.531, + "eval_steps_per_second": 8.976, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 0.9041996598243713, + "learning_rate": 3.1e-05, + "loss": 0.0627, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7412587412587412, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6463414634146342, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13561618328094482, + "eval_overall_accuracy": 0.9621005697299975, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7655860349127181, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.038, + "eval_steps_per_second": 9.017, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 0.9759089350700378, + "learning_rate": 3.05e-05, + "loss": 0.0592, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.8299319727891156, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7246376811594203, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6493506493506493, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.880503144654088, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1278260350227356, + "eval_overall_accuracy": 0.964082239286599, + "eval_overall_f1": 0.8283378746594006, + "eval_overall_precision": 0.7835051546391752, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.813, + "eval_steps_per_second": 9.013, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 0.8231136798858643, + "learning_rate": 3e-05, + "loss": 0.06, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.7581699346405228, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6904761904761905, + "eval_LOCATION_recall": 0.8405797101449275, + "eval_ORGANIZATION_f1": 0.7375886524822695, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.65, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.880503144654088, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8125, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.13648688793182373, + "eval_overall_accuracy": 0.9598711914788209, + "eval_overall_f1": 0.8085676037483266, + "eval_overall_precision": 0.7531172069825436, + "eval_overall_recall": 0.8728323699421965, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.148, + "eval_steps_per_second": 9.018, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 0.8856554627418518, + "learning_rate": 2.95e-05, + "loss": 0.0583, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7586206896551725, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6547619047619048, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.8819875776397514, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8402366863905325, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8125, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.1364709734916687, + "eval_overall_accuracy": 0.9618528610354223, + "eval_overall_f1": 0.822429906542056, + "eval_overall_precision": 0.7642679900744417, + "eval_overall_recall": 0.8901734104046243, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.438, + "eval_steps_per_second": 9.039, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 0.7966371178627014, + "learning_rate": 2.9e-05, + "loss": 0.0592, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7482993197278911, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6395348837209303, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.896551724137931, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8614457831325302, + "eval_PERSON_recall": 0.934640522875817, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14667053520679474, + "eval_overall_accuracy": 0.9593757740896706, + "eval_overall_f1": 0.824468085106383, + "eval_overall_precision": 0.7635467980295566, + "eval_overall_recall": 0.8959537572254336, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.779, + "eval_steps_per_second": 9.157, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 0.6787899732589722, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0584, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7092198581560285, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8875000000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8502994011976048, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7123287671232876, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6341463414634146, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.787878787878788, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7428571428571429, + "eval_TIME_recall": 0.8387096774193549, + "eval_loss": 0.14089816808700562, + "eval_overall_accuracy": 0.9591280653950953, + "eval_overall_f1": 0.8133333333333334, + "eval_overall_precision": 0.754950495049505, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3317, + "eval_samples_per_second": 563.793, + "eval_steps_per_second": 9.045, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 1.8825234174728394, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0571, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7361111111111112, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6385542168674698, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7761194029850748, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7428571428571429, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1371491551399231, + "eval_overall_accuracy": 0.9621005697299975, + "eval_overall_f1": 0.8209959623149394, + "eval_overall_precision": 0.7682619647355163, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3331, + "eval_samples_per_second": 561.467, + "eval_steps_per_second": 9.007, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 1.0761770009994507, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0568, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.8163265306122449, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.75177304964539, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6625, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.879746835443038, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.852760736196319, + "eval_PERSON_recall": 0.9084967320261438, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8709677419354839, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8709677419354839, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1322641521692276, + "eval_overall_accuracy": 0.9633391132028735, + "eval_overall_f1": 0.8310626702997275, + "eval_overall_precision": 0.7860824742268041, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.056, + "eval_steps_per_second": 9.017, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.5931002497673035, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0518, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7361111111111112, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6385542168674698, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8924050632911392, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8650306748466258, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7142857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14024412631988525, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8165997322623829, + "eval_overall_precision": 0.7605985037406484, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3334, + "eval_samples_per_second": 560.842, + "eval_steps_per_second": 8.997, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 0.7838124632835388, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0529, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.7919463087248322, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7375, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7310344827586208, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6309523809523809, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8536585365853658, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7246376811594203, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.78125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1365956962108612, + "eval_overall_accuracy": 0.9621005697299975, + "eval_overall_f1": 0.8172043010752688, + "eval_overall_precision": 0.7638190954773869, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.284, + "eval_steps_per_second": 9.021, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 1.4798297882080078, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0516, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7153284671532848, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6447368421052632, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.880503144654088, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7222222222222223, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.65, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14157778024673462, + "eval_overall_accuracy": 0.9598711914788209, + "eval_overall_f1": 0.8167115902964959, + "eval_overall_precision": 0.7651515151515151, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3333, + "eval_samples_per_second": 561.042, + "eval_steps_per_second": 9.001, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 0.9078386425971985, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0524, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7285714285714285, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6455696202531646, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8930817610062893, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8606060606060606, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14155249297618866, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8286099865047234, + "eval_overall_precision": 0.7772151898734178, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.426, + "eval_steps_per_second": 9.039, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 1.6509796380996704, + "learning_rate": 2.5e-05, + "loss": 0.0494, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7445255474452555, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6710526315789473, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8867924528301887, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8545454545454545, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1381203830242157, + "eval_overall_accuracy": 0.9621005697299975, + "eval_overall_f1": 0.8254397834912043, + "eval_overall_precision": 0.7760814249363868, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3345, + "eval_samples_per_second": 559.02, + "eval_steps_per_second": 8.968, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 0.7626707553863525, + "learning_rate": 2.45e-05, + "loss": 0.0478, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.8243243243243245, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7297297297297297, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6206896551724138, + "eval_ORGANIZATION_recall": 0.8852459016393442, + "eval_PERSON_f1": 0.8867924528301887, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8545454545454545, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7941176470588235, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.75, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14348609745502472, + "eval_overall_accuracy": 0.9623482784245727, + "eval_overall_f1": 0.8310991957104559, + "eval_overall_precision": 0.775, + "eval_overall_recall": 0.8959537572254336, + "eval_runtime": 0.3315, + "eval_samples_per_second": 564.037, + "eval_steps_per_second": 9.049, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 0.5748308300971985, + "learning_rate": 2.4e-05, + "loss": 0.0485, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.8356164383561644, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.632183908045977, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.8867924528301887, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8545454545454545, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14024515450000763, + "eval_overall_accuracy": 0.9625959871191478, + "eval_overall_f1": 0.836021505376344, + "eval_overall_precision": 0.7814070351758794, + "eval_overall_recall": 0.8988439306358381, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.57, + "eval_steps_per_second": 9.025, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.001908779144287, + "learning_rate": 2.35e-05, + "loss": 0.0475, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.8356164383561644, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.726027397260274, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7761194029850748, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7428571428571429, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.13823822140693665, + "eval_overall_accuracy": 0.9630914045082982, + "eval_overall_f1": 0.8313090418353577, + "eval_overall_precision": 0.779746835443038, + "eval_overall_recall": 0.8901734104046243, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.779, + "eval_steps_per_second": 8.996, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 0.5605162382125854, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0481, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.7919463087248322, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7375, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7285714285714285, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6455696202531646, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8867924528301887, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8545454545454545, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.75, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14209988713264465, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8209959623149394, + "eval_overall_precision": 0.7682619647355163, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3329, + "eval_samples_per_second": 561.696, + "eval_steps_per_second": 9.011, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 1.0782426595687866, + "learning_rate": 2.25e-05, + "loss": 0.0474, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.8299319727891156, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7432432432432433, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.632183908045977, + "eval_ORGANIZATION_recall": 0.9016393442622951, + "eval_PERSON_f1": 0.8909657320872275, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8511904761904762, + "eval_PERSON_recall": 0.934640522875817, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14407247304916382, + "eval_overall_accuracy": 0.9618528610354223, + "eval_overall_f1": 0.8346666666666667, + "eval_overall_precision": 0.7747524752475248, + "eval_overall_recall": 0.9046242774566474, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.974, + "eval_steps_per_second": 9.016, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 2.055668592453003, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0456, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7153284671532848, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6447368421052632, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8930817610062893, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8606060606060606, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.75, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14129342138767242, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8183041722745625, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.981, + "eval_steps_per_second": 9.032, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 0.7829787135124207, + "learning_rate": 2.15e-05, + "loss": 0.0451, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7428571428571428, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6582278481012658, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.89375, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8562874251497006, + "eval_PERSON_recall": 0.934640522875817, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14520226418972015, + "eval_overall_accuracy": 0.9625959871191478, + "eval_overall_f1": 0.8268456375838926, + "eval_overall_precision": 0.7719298245614035, + "eval_overall_recall": 0.8901734104046243, + "eval_runtime": 0.3271, + "eval_samples_per_second": 571.771, + "eval_steps_per_second": 9.173, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 0.2567724287509918, + "learning_rate": 2.1e-05, + "loss": 0.0437, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7222222222222222, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6265060240963856, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8958990536277602, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8658536585365854, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14597974717617035, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7655860349127181, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3323, + "eval_samples_per_second": 562.69, + "eval_steps_per_second": 9.027, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 0.4437527656555176, + "learning_rate": 2.05e-05, + "loss": 0.0424, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7272727272727272, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6341463414634146, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8875000000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8502994011976048, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8181818181818182, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7714285714285715, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1500689834356308, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8186666666666668, + "eval_overall_precision": 0.7599009900990099, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.659, + "eval_steps_per_second": 9.027, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 0.7316974997520447, + "learning_rate": 2e-05, + "loss": 0.0423, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.7999999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7210884353741497, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6162790697674418, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8847352024922119, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8452380952380952, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7323943661971831, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15397267043590546, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8169761273209548, + "eval_overall_precision": 0.7549019607843137, + "eval_overall_recall": 0.8901734104046243, + "eval_runtime": 0.3265, + "eval_samples_per_second": 572.825, + "eval_steps_per_second": 9.19, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 0.7264176607131958, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.041, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.726027397260274, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.147948756814003, + "eval_overall_accuracy": 0.9611097349516968, + "eval_overall_f1": 0.8208556149732621, + "eval_overall_precision": 0.763681592039801, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.648, + "eval_steps_per_second": 8.994, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 0.7140088677406311, + "learning_rate": 1.9e-05, + "loss": 0.0405, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8847352024922119, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8452380952380952, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.148734450340271, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8150134048257373, + "eval_overall_precision": 0.76, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.598, + "eval_steps_per_second": 9.01, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 1.8145302534103394, + "learning_rate": 1.85e-05, + "loss": 0.0394, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.726027397260274, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6235294117647059, + "eval_ORGANIZATION_recall": 0.8688524590163934, + "eval_PERSON_f1": 0.8902821316614421, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8554216867469879, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7761194029850748, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7428571428571429, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14801925420761108, + "eval_overall_accuracy": 0.9625959871191478, + "eval_overall_f1": 0.8241610738255033, + "eval_overall_precision": 0.7694235588972431, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.37, + "eval_steps_per_second": 9.022, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 1.2566087245941162, + "learning_rate": 1.8e-05, + "loss": 0.0384, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.627906976744186, + "eval_ORGANIZATION_recall": 0.8852459016393442, + "eval_PERSON_f1": 0.89375, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8562874251497006, + "eval_PERSON_recall": 0.934640522875817, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15676021575927734, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.824468085106383, + "eval_overall_precision": 0.7635467980295566, + "eval_overall_recall": 0.8959537572254336, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.621, + "eval_steps_per_second": 9.01, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 1.431575894355774, + "learning_rate": 1.75e-05, + "loss": 0.0397, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.8187919463087248, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7625, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6993006993006993, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6097560975609756, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8785046728971961, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8392857142857143, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7605633802816902, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.14874185621738434, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8170894526034712, + "eval_overall_precision": 0.7593052109181141, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.977, + "eval_steps_per_second": 9.192, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 1.1114250421524048, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0388, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7074829931972788, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6046511627906976, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8916408668730651, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8470588235294118, + "eval_PERSON_recall": 0.9411764705882353, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1624731570482254, + "eval_overall_accuracy": 0.9601189001733961, + "eval_overall_f1": 0.8153034300791557, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8930635838150289, + "eval_runtime": 0.3323, + "eval_samples_per_second": 562.754, + "eval_steps_per_second": 9.028, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 2.434706926345825, + "learning_rate": 1.65e-05, + "loss": 0.0402, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.8026315789473685, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7123287671232876, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.611764705882353, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8909657320872275, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8511904761904762, + "eval_PERSON_recall": 0.934640522875817, + "eval_QUANTITY_f1": 0.75, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1620258390903473, + "eval_overall_accuracy": 0.9596234827842457, + "eval_overall_f1": 0.8201058201058201, + "eval_overall_precision": 0.7560975609756098, + "eval_overall_recall": 0.8959537572254336, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.349, + "eval_steps_per_second": 9.022, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 0.43614867329597473, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0397, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7183098591549297, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6296296296296297, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1530744731426239, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8155080213903744, + "eval_overall_precision": 0.7587064676616916, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.328, + "eval_samples_per_second": 570.186, + "eval_steps_per_second": 9.147, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 1.2761048078536987, + "learning_rate": 1.55e-05, + "loss": 0.0383, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7034482758620689, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6071428571428571, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8847352024922119, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8452380952380952, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15172313153743744, + "eval_overall_accuracy": 0.9618528610354223, + "eval_overall_f1": 0.8219544846050869, + "eval_overall_precision": 0.7655860349127181, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3319, + "eval_samples_per_second": 563.458, + "eval_steps_per_second": 9.039, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 1.8664883375167847, + "learning_rate": 1.5e-05, + "loss": 0.0375, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7172413793103448, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6190476190476191, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8875000000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8502994011976048, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1547580361366272, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8208556149732621, + "eval_overall_precision": 0.763681592039801, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.3256, + "eval_samples_per_second": 574.274, + "eval_steps_per_second": 9.213, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 2.3128960132598877, + "learning_rate": 1.45e-05, + "loss": 0.0368, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.689655172413793, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.5952380952380952, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15528355538845062, + "eval_overall_accuracy": 0.9598711914788209, + "eval_overall_f1": 0.8117489986648866, + "eval_overall_precision": 0.7543424317617866, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.889, + "eval_steps_per_second": 9.03, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 2.342613458633423, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0375, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15482325851917267, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8150134048257373, + "eval_overall_precision": 0.76, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.879, + "eval_steps_per_second": 9.03, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 2.5355284214019775, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0364, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.7733333333333333, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7160493827160493, + "eval_LOCATION_recall": 0.8405797101449275, + "eval_ORGANIZATION_f1": 0.6950354609929078, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6125, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.84375, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15112702548503876, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.811320754716981, + "eval_overall_precision": 0.76010101010101, + "eval_overall_recall": 0.869942196531792, + "eval_runtime": 0.3265, + "eval_samples_per_second": 572.81, + "eval_steps_per_second": 9.189, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 2.763317108154297, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0356, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6944444444444445, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6024096385542169, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7941176470588235, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.75, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15702202916145325, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8128342245989304, + "eval_overall_precision": 0.7562189054726368, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.361, + "eval_steps_per_second": 9.022, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.9147346615791321, + "learning_rate": 1.25e-05, + "loss": 0.0345, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.8053691275167786, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7132867132867133, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6219512195121951, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8875000000000001, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8502994011976048, + "eval_PERSON_recall": 0.9281045751633987, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.160079225897789, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8192771084337348, + "eval_overall_precision": 0.7630922693266833, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.3259, + "eval_samples_per_second": 573.758, + "eval_steps_per_second": 9.205, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 0.9755911827087402, + "learning_rate": 1.2e-05, + "loss": 0.0358, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7172413793103448, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6190476190476191, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7941176470588235, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.75, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15988942980766296, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.816, + "eval_overall_precision": 0.7574257425742574, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.905, + "eval_steps_per_second": 9.031, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 0.6485092639923096, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0359, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7092198581560285, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7297297297297297, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15627126395702362, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8236877523553163, + "eval_overall_precision": 0.7707808564231738, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.64, + "eval_steps_per_second": 9.01, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 0.8235780596733093, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0328, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.8187919463087248, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7625, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7050359712230215, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6282051282051282, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7536231884057971, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7027027027027027, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15545301139354706, + "eval_overall_accuracy": 0.9616051523408472, + "eval_overall_f1": 0.8216216216216216, + "eval_overall_precision": 0.7715736040609137, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.819, + "eval_steps_per_second": 9.158, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 1.297727108001709, + "learning_rate": 1.05e-05, + "loss": 0.034, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7183098591549297, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6296296296296297, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1584213823080063, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8187919463087249, + "eval_overall_precision": 0.7644110275689223, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3318, + "eval_samples_per_second": 563.58, + "eval_steps_per_second": 9.041, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 1.0464829206466675, + "learning_rate": 1e-05, + "loss": 0.0344, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15635241568088531, + "eval_overall_accuracy": 0.9621005697299975, + "eval_overall_f1": 0.8216216216216216, + "eval_overall_precision": 0.7715736040609137, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3334, + "eval_samples_per_second": 560.931, + "eval_steps_per_second": 8.999, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 1.1984474658966064, + "learning_rate": 9.5e-06, + "loss": 0.0337, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6950354609929078, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6125, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16573630273342133, + "eval_overall_accuracy": 0.9601189001733961, + "eval_overall_f1": 0.8150134048257373, + "eval_overall_precision": 0.76, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.721, + "eval_steps_per_second": 8.996, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 1.3891063928604126, + "learning_rate": 9e-06, + "loss": 0.0331, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.8187919463087248, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7625, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6986301369863014, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.88125, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.844311377245509, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16680771112442017, + "eval_overall_accuracy": 0.9601189001733961, + "eval_overall_f1": 0.8186666666666668, + "eval_overall_precision": 0.7599009900990099, + "eval_overall_recall": 0.8872832369942196, + "eval_runtime": 0.326, + "eval_samples_per_second": 573.602, + "eval_steps_per_second": 9.202, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 1.0325218439102173, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0328, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.880503144654088, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15955756604671478, + "eval_overall_accuracy": 0.9611097349516968, + "eval_overall_f1": 0.8167115902964959, + "eval_overall_precision": 0.7651515151515151, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.469, + "eval_steps_per_second": 9.024, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 1.833433747291565, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0339, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.723404255319149, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6375, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7536231884057971, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7027027027027027, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15904375910758972, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8183041722745625, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.206, + "eval_steps_per_second": 9.019, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 0.8579007983207703, + "learning_rate": 7.5e-06, + "loss": 0.0324, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6906474820143885, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.7868852459016393, + "eval_PERSON_f1": 0.8742138364779874, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9084967320261438, + "eval_QUANTITY_f1": 0.7647058823529411, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.15392935276031494, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8092016238159675, + "eval_overall_precision": 0.7608142493638677, + "eval_overall_recall": 0.8641618497109826, + "eval_runtime": 0.3321, + "eval_samples_per_second": 563.068, + "eval_steps_per_second": 9.033, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 2.3941845893859863, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0327, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6762589928057554, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6025641025641025, + "eval_ORGANIZATION_recall": 0.7704918032786885, + "eval_PERSON_f1": 0.875, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8383233532934131, + "eval_PERSON_recall": 0.9150326797385621, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1588955819606781, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8053691275167784, + "eval_overall_precision": 0.7518796992481203, + "eval_overall_recall": 0.8670520231213873, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.03, + "eval_steps_per_second": 9.193, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 1.8358588218688965, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0318, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.723404255319149, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6375, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16152726113796234, + "eval_overall_accuracy": 0.9611097349516968, + "eval_overall_f1": 0.8198924731182795, + "eval_overall_precision": 0.7663316582914573, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3317, + "eval_samples_per_second": 563.682, + "eval_steps_per_second": 9.043, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 2.0525128841400146, + "learning_rate": 6e-06, + "loss": 0.0311, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6906474820143885, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.7868852459016393, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16066040098667145, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8129205921938087, + "eval_overall_precision": 0.760705289672544, + "eval_overall_recall": 0.8728323699421965, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.545, + "eval_steps_per_second": 9.025, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 0.892722487449646, + "learning_rate": 5.500000000000001e-06, + "loss": 0.033, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7108433734939759, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6956521739130435, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6233766233766234, + "eval_ORGANIZATION_recall": 0.7868852459016393, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16328519582748413, + "eval_overall_accuracy": 0.9601189001733961, + "eval_overall_f1": 0.8118279569892471, + "eval_overall_precision": 0.7587939698492462, + "eval_overall_recall": 0.8728323699421965, + "eval_runtime": 0.3318, + "eval_samples_per_second": 563.6, + "eval_steps_per_second": 9.042, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 1.3466918468475342, + "learning_rate": 5e-06, + "loss": 0.0314, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.7999999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7407407407407407, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.620253164556962, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1654297560453415, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8183041722745625, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3339, + "eval_samples_per_second": 560.083, + "eval_steps_per_second": 8.985, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 0.5831097960472107, + "learning_rate": 4.5e-06, + "loss": 0.0317, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.620253164556962, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16038751602172852, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8145161290322581, + "eval_overall_precision": 0.7613065326633166, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3258, + "eval_samples_per_second": 573.92, + "eval_steps_per_second": 9.207, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 2.8795530796051025, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0313, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.742857142857143, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.6842105263157895, + "eval_QUANTITY_recall": 0.8125, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16018863022327423, + "eval_overall_accuracy": 0.961357443646272, + "eval_overall_f1": 0.8156123822341858, + "eval_overall_precision": 0.7632241813602015, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.323, + "eval_steps_per_second": 9.021, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.9272975325584412, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0317, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7142857142857143, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16194607317447662, + "eval_overall_accuracy": 0.9611097349516968, + "eval_overall_f1": 0.8183041722745625, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3339, + "eval_samples_per_second": 560.094, + "eval_steps_per_second": 8.985, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 1.6155447959899902, + "learning_rate": 3e-06, + "loss": 0.0322, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7272727272727272, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6341463414634146, + "eval_ORGANIZATION_recall": 0.8524590163934426, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16590212285518646, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8203753351206433, + "eval_overall_precision": 0.765, + "eval_overall_recall": 0.884393063583815, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.263, + "eval_steps_per_second": 9.036, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 0.8714067339897156, + "learning_rate": 2.5e-06, + "loss": 0.0305, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6993006993006993, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6097560975609756, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16414810717105865, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8128342245989304, + "eval_overall_precision": 0.7562189054726368, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3262, + "eval_samples_per_second": 573.227, + "eval_steps_per_second": 9.196, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 0.7234588265419006, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0302, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7050359712230215, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6282051282051282, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1619059145450592, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8167115902964959, + "eval_overall_precision": 0.7651515151515151, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.3323, + "eval_samples_per_second": 562.669, + "eval_steps_per_second": 9.027, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 1.2419594526290894, + "learning_rate": 1.5e-06, + "loss": 0.0309, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.7866666666666667, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7283950617283951, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.7183098591549297, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6296296296296297, + "eval_ORGANIZATION_recall": 0.8360655737704918, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1639154851436615, + "eval_overall_accuracy": 0.9608620262571216, + "eval_overall_f1": 0.8187919463087249, + "eval_overall_precision": 0.7644110275689223, + "eval_overall_recall": 0.8815028901734104, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.252, + "eval_steps_per_second": 9.02, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 0.3145425021648407, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0288, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6993006993006993, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6097560975609756, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.84375, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.8181818181818182, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.16421489417552948, + "eval_overall_accuracy": 0.9606143175625464, + "eval_overall_f1": 0.8139223560910309, + "eval_overall_precision": 0.7581047381546134, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.38, + "eval_steps_per_second": 9.022, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 0.6178627610206604, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0307, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6901408450704226, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6049382716049383, + "eval_ORGANIZATION_recall": 0.8032786885245902, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1645815670490265, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8112449799196787, + "eval_overall_precision": 0.7556109725685786, + "eval_overall_recall": 0.8757225433526011, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.213, + "eval_steps_per_second": 9.036, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 1.0940686464309692, + "learning_rate": 0.0, + "loss": 0.032, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.7814569536423841, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7195121951219512, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6993006993006993, + "eval_ORGANIZATION_number": 61, + "eval_ORGANIZATION_precision": 0.6097560975609756, + "eval_ORGANIZATION_recall": 0.819672131147541, + "eval_PERSON_f1": 0.8840125391849529, + "eval_PERSON_number": 153, + "eval_PERSON_precision": 0.8493975903614458, + "eval_PERSON_recall": 0.9215686274509803, + "eval_QUANTITY_f1": 0.7714285714285714, + "eval_QUANTITY_number": 32, + "eval_QUANTITY_precision": 0.7105263157894737, + "eval_QUANTITY_recall": 0.84375, + "eval_TIME_f1": 0.8307692307692308, + "eval_TIME_number": 31, + "eval_TIME_precision": 0.7941176470588235, + "eval_TIME_recall": 0.8709677419354839, + "eval_loss": 0.1641756296157837, + "eval_overall_accuracy": 0.9603666088679713, + "eval_overall_f1": 0.8128342245989304, + "eval_overall_precision": 0.7562189054726368, + "eval_overall_recall": 0.8786127167630058, + "eval_runtime": 0.3326, + "eval_samples_per_second": 562.318, + "eval_steps_per_second": 9.021, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 4608467078884878.0, + "train_loss": 0.08280598743906561, + "train_runtime": 623.2047, + "train_samples_per_second": 270.698, + "train_steps_per_second": 17.009 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 4608467078884878.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}