diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,3129 +1,184 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 100.0, + "epoch": 5.0, "eval_steps": 500, - "global_step": 9600, + "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, - "grad_norm": 4.475216865539551, - "learning_rate": 4.9500000000000004e-05, - "loss": 0.2611, + "grad_norm": 2.2837767601013184, + "learning_rate": 4e-05, + "loss": 0.2529, "step": 96 }, { "epoch": 1.0, - "eval_LOCATION_f1": 0.9109947643979057, + "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8969072164948454, - "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.8967551622418879, + "eval_LOCATION_precision": 0.9438202247191011, + "eval_LOCATION_recall": 0.8936170212765957, + "eval_ORGANIZATION_f1": 0.887608069164265, "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8837209302325582, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9854014598540146, + "eval_ORGANIZATION_precision": 0.8555555555555555, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.966789667896679, "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9854014598540146, - "eval_PERSON_recall": 0.9854014598540146, - "eval_loss": 0.046344444155693054, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9303482587064676, - "eval_overall_precision": 0.9211822660098522, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.5089, - "eval_samples_per_second": 334.076, - "eval_steps_per_second": 5.895, + "eval_PERSON_precision": 0.9776119402985075, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.04780818894505501, + "eval_overall_accuracy": 0.9850828729281768, + "eval_overall_f1": 0.9213483146067416, + "eval_overall_precision": 0.9156327543424317, + "eval_overall_recall": 0.9271356783919598, + "eval_runtime": 1.239, + "eval_samples_per_second": 137.204, + "eval_steps_per_second": 2.421, "step": 96 }, { "epoch": 2.0, - "grad_norm": 5.619458198547363, - "learning_rate": 4.9e-05, - "loss": 0.0645, + "grad_norm": 1.9025866985321045, + "learning_rate": 3e-05, + "loss": 0.0617, "step": 192 }, { "epoch": 2.0, - "eval_LOCATION_f1": 0.8490566037735849, + "eval_LOCATION_f1": 0.8969072164948454, "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.7627118644067796, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.8597560975609756, + "eval_LOCATION_precision": 0.87, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.9005847953216375, "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8757763975155279, - "eval_ORGANIZATION_recall": 0.844311377245509, - "eval_PERSON_f1": 0.981549815498155, + "eval_ORGANIZATION_precision": 0.88, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9675090252707581, "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.0677841454744339, - "eval_overall_accuracy": 0.9779005524861878, - "eval_overall_f1": 0.8976572133168926, - "eval_overall_precision": 0.8813559322033898, - "eval_overall_recall": 0.914572864321608, - "eval_runtime": 0.5311, - "eval_samples_per_second": 320.081, - "eval_steps_per_second": 5.648, + "eval_PERSON_precision": 0.9571428571428572, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.05445471405982971, + "eval_overall_accuracy": 0.9814917127071823, + "eval_overall_f1": 0.9225092250922509, + "eval_overall_precision": 0.9036144578313253, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 1.2045, + "eval_samples_per_second": 141.139, + "eval_steps_per_second": 2.491, "step": 192 }, { "epoch": 3.0, - "grad_norm": 0.5541238784790039, - "learning_rate": 4.85e-05, - "loss": 0.0354, + "grad_norm": 1.029209852218628, + "learning_rate": 2e-05, + "loss": 0.0309, "step": 288 }, { "epoch": 3.0, - "eval_LOCATION_f1": 0.9278350515463918, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.8988095238095238, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9818181818181817, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9782608695652174, - "eval_PERSON_recall": 0.9854014598540146, - "eval_loss": 0.04672340676188469, - "eval_overall_accuracy": 0.9861878453038674, - "eval_overall_f1": 0.9341614906832298, - "eval_overall_precision": 0.9238329238329238, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.5296, - "eval_samples_per_second": 321.026, - "eval_steps_per_second": 5.665, - "step": 288 - }, - { - "epoch": 4.0, - "grad_norm": 2.0090787410736084, - "learning_rate": 4.8e-05, - "loss": 0.0232, - "step": 384 - }, - { - "epoch": 4.0, - "eval_LOCATION_f1": 0.9230769230769231, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8910891089108911, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9164086687306501, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9487179487179487, - "eval_ORGANIZATION_recall": 0.8862275449101796, - "eval_PERSON_f1": 0.9552238805970148, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9770992366412213, - "eval_PERSON_recall": 0.9343065693430657, - "eval_loss": 0.06353317946195602, - "eval_overall_accuracy": 0.9845303867403314, - "eval_overall_f1": 0.9312977099236641, - "eval_overall_precision": 0.9432989690721649, - "eval_overall_recall": 0.9195979899497487, - "eval_runtime": 0.5408, - "eval_samples_per_second": 314.335, - "eval_steps_per_second": 5.547, - "step": 384 - }, - { - "epoch": 5.0, - "grad_norm": 0.37130075693130493, - "learning_rate": 4.75e-05, - "loss": 0.0158, - "step": 480 - }, - { - "epoch": 5.0, - "eval_LOCATION_f1": 0.91, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8584905660377359, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9022082018927444, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9533333333333334, - "eval_ORGANIZATION_recall": 0.8562874251497006, - "eval_PERSON_f1": 0.9854014598540146, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9854014598540146, - "eval_PERSON_recall": 0.9854014598540146, - "eval_loss": 0.05297553166747093, - "eval_overall_accuracy": 0.9861878453038674, - "eval_overall_f1": 0.9329962073324906, - "eval_overall_precision": 0.9389312977099237, - "eval_overall_recall": 0.9271356783919598, - "eval_runtime": 0.5568, - "eval_samples_per_second": 305.315, - "eval_steps_per_second": 5.388, - "step": 480 - }, - { - "epoch": 6.0, - "grad_norm": 1.5923467874526978, - "learning_rate": 4.7e-05, - "loss": 0.011, - "step": 576 - }, - { - "epoch": 6.0, - "eval_LOCATION_f1": 0.9319371727748691, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9129129129129129, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9156626506024096, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9816849816849818, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9852941176470589, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.05083903297781944, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9410288582183186, - "eval_overall_precision": 0.9398496240601504, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.5598, - "eval_samples_per_second": 303.698, - "eval_steps_per_second": 5.359, - "step": 576 - }, - { - "epoch": 7.0, - "grad_norm": 2.4925856590270996, - "learning_rate": 4.6500000000000005e-05, - "loss": 0.0086, - "step": 672 - }, - { - "epoch": 7.0, - "eval_LOCATION_f1": 0.925531914893617, + "eval_LOCATION_f1": 0.883248730964467, "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.925531914893617, + "eval_LOCATION_precision": 0.8446601941747572, "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.9176470588235294, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9017341040462428, - "eval_ORGANIZATION_recall": 0.9341317365269461, - "eval_PERSON_f1": 0.9816849816849818, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9852941176470589, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.0657382383942604, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9413233458177278, - "eval_overall_precision": 0.9354838709677419, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.5677, - "eval_samples_per_second": 299.462, - "eval_steps_per_second": 5.285, - "step": 672 - }, - { - "epoch": 8.0, - "grad_norm": 0.08103451132774353, - "learning_rate": 4.600000000000001e-05, - "loss": 0.007, - "step": 768 - }, - { - "epoch": 8.0, - "eval_LOCATION_f1": 0.900523560209424, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8865979381443299, - "eval_LOCATION_recall": 0.9148936170212766, - "eval_ORGANIZATION_f1": 0.8988095238095238, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.0754326730966568, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9261576971214018, - "eval_overall_precision": 0.9226932668329177, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.5838, - "eval_samples_per_second": 291.187, - "eval_steps_per_second": 5.139, - "step": 768 - }, - { - "epoch": 9.0, - "grad_norm": 0.2643495202064514, - "learning_rate": 4.55e-05, - "loss": 0.0061, - "step": 864 - }, - { - "epoch": 9.0, - "eval_LOCATION_f1": 0.908108108108108, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.8936170212765957, - "eval_ORGANIZATION_f1": 0.9080459770114944, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8729281767955801, - "eval_ORGANIZATION_recall": 0.9461077844311377, - "eval_PERSON_f1": 0.988929889298893, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.07025933265686035, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9353233830845771, - "eval_overall_precision": 0.9261083743842364, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.5996, - "eval_samples_per_second": 283.509, - "eval_steps_per_second": 5.003, - "step": 864 - }, - { - "epoch": 10.0, - "grad_norm": 0.8626702427864075, - "learning_rate": 4.5e-05, - "loss": 0.0058, - "step": 960 - }, - { - "epoch": 10.0, - "eval_LOCATION_f1": 0.9032258064516129, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9130434782608695, - "eval_LOCATION_recall": 0.8936170212765957, - "eval_ORGANIZATION_f1": 0.9011627906976745, + "eval_ORGANIZATION_f1": 0.8650306748466258, "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8757062146892656, - "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_ORGANIZATION_precision": 0.8867924528301887, + "eval_ORGANIZATION_recall": 0.844311377245509, "eval_PERSON_f1": 0.9708029197080292, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9708029197080292, "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.06500135362148285, - "eval_overall_accuracy": 0.9867403314917127, - "eval_overall_f1": 0.9253731343283583, - "eval_overall_precision": 0.916256157635468, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6116, - "eval_samples_per_second": 277.94, - "eval_steps_per_second": 4.905, - "step": 960 - }, - { - "epoch": 11.0, - "grad_norm": 0.020984740927815437, - "learning_rate": 4.4500000000000004e-05, - "loss": 0.0048, - "step": 1056 - }, - { - "epoch": 11.0, - "eval_LOCATION_f1": 0.9128205128205128, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8811881188118812, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9112426035502958, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9005847953216374, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.08488745987415314, - "eval_overall_accuracy": 0.9859116022099448, - "eval_overall_f1": 0.9328358208955224, - "eval_overall_precision": 0.9236453201970444, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6184, - "eval_samples_per_second": 274.893, - "eval_steps_per_second": 4.851, - "step": 1056 + "eval_loss": 0.05393547564744949, + "eval_overall_accuracy": 0.9828729281767956, + "eval_overall_f1": 0.9058971141781681, + "eval_overall_precision": 0.9047619047619048, + "eval_overall_recall": 0.907035175879397, + "eval_runtime": 0.6088, + "eval_samples_per_second": 279.254, + "eval_steps_per_second": 4.928, + "step": 288 }, { - "epoch": 12.0, - "grad_norm": 0.3056125044822693, - "learning_rate": 4.4000000000000006e-05, - "loss": 0.0057, - "step": 1152 + "epoch": 4.0, + "grad_norm": 0.655804455280304, + "learning_rate": 1e-05, + "loss": 0.0178, + "step": 384 }, { - "epoch": 12.0, - "eval_LOCATION_f1": 0.9109947643979057, + "epoch": 4.0, + "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8969072164948454, + "eval_LOCATION_precision": 0.8877551020408163, "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.903225806451613, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8850574712643678, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.988929889298893, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.072464220225811, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.9339975093399752, - "eval_overall_precision": 0.9259259259259259, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.622, - "eval_samples_per_second": 273.318, - "eval_steps_per_second": 4.823, - "step": 1152 - }, - { - "epoch": 13.0, - "grad_norm": 0.08819713443517685, - "learning_rate": 4.35e-05, - "loss": 0.0052, - "step": 1248 - }, - { - "epoch": 13.0, - "eval_LOCATION_f1": 0.9130434782608695, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9333333333333333, - "eval_LOCATION_recall": 0.8936170212765957, - "eval_ORGANIZATION_f1": 0.8895522388059701, + "eval_ORGANIZATION_f1": 0.9020771513353116, "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8869047619047619, - "eval_ORGANIZATION_recall": 0.8922155688622755, + "eval_ORGANIZATION_precision": 0.8941176470588236, + "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.08716335147619247, + "eval_loss": 0.05562783032655716, "eval_overall_accuracy": 0.9845303867403314, - "eval_overall_f1": 0.9267676767676767, - "eval_overall_precision": 0.9314720812182741, - "eval_overall_recall": 0.9221105527638191, - "eval_runtime": 0.6038, - "eval_samples_per_second": 281.559, - "eval_steps_per_second": 4.969, - "step": 1248 - }, - { - "epoch": 14.0, - "grad_norm": 0.0017481500981375575, - "learning_rate": 4.3e-05, - "loss": 0.002, - "step": 1344 - }, - { - "epoch": 14.0, - "eval_LOCATION_f1": 0.8972972972972972, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.8829787234042553, - "eval_ORGANIZATION_f1": 0.9285714285714287, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9230769230769231, - "eval_ORGANIZATION_recall": 0.9341317365269461, - "eval_PERSON_f1": 0.9816849816849818, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9852941176470589, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.07973892986774445, - "eval_overall_accuracy": 0.9881215469613259, - "eval_overall_f1": 0.9395465994962218, - "eval_overall_precision": 0.9419191919191919, + "eval_overall_f1": 0.9301745635910225, + "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6104, - "eval_samples_per_second": 278.491, - "eval_steps_per_second": 4.915, - "step": 1344 - }, - { - "epoch": 15.0, - "grad_norm": 5.35858154296875, - "learning_rate": 4.25e-05, - "loss": 0.0036, - "step": 1440 - }, - { - "epoch": 15.0, - "eval_LOCATION_f1": 0.9297297297297297, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.945054945054945, - "eval_LOCATION_recall": 0.9148936170212766, - "eval_ORGANIZATION_f1": 0.9235294117647059, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9075144508670521, - "eval_ORGANIZATION_recall": 0.9401197604790419, - "eval_PERSON_f1": 0.988929889298893, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.08796700835227966, - "eval_overall_accuracy": 0.987292817679558, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6289, - "eval_samples_per_second": 270.333, - "eval_steps_per_second": 4.771, - "step": 1440 - }, - { - "epoch": 16.0, - "grad_norm": 0.012317053973674774, - "learning_rate": 4.2e-05, - "loss": 0.0026, - "step": 1536 - }, - { - "epoch": 16.0, - "eval_LOCATION_f1": 0.8888888888888888, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8842105263157894, - "eval_LOCATION_recall": 0.8936170212765957, - "eval_ORGANIZATION_f1": 0.9305135951661632, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9390243902439024, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.08714718371629715, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.9368686868686869, - "eval_overall_precision": 0.9416243654822335, - "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.6145, - "eval_samples_per_second": 276.648, - "eval_steps_per_second": 4.882, - "step": 1536 - }, - { - "epoch": 17.0, - "grad_norm": 0.00884944200515747, - "learning_rate": 4.15e-05, - "loss": 0.0028, - "step": 1632 - }, - { - "epoch": 17.0, - "eval_LOCATION_f1": 0.9175257731958764, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.89, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.924924924924925, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.927710843373494, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9851851851851852, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.08211695402860641, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9435382685069008, - "eval_overall_precision": 0.9423558897243107, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6144, - "eval_samples_per_second": 276.708, - "eval_steps_per_second": 4.883, - "step": 1632 - }, - { - "epoch": 18.0, - "grad_norm": 0.005733116064220667, - "learning_rate": 4.1e-05, - "loss": 0.0024, - "step": 1728 - }, - { - "epoch": 18.0, - "eval_LOCATION_f1": 0.9166666666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8979591836734694, - "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9337349397590362, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9393939393939394, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.07464080303907394, - "eval_overall_accuracy": 0.988950276243094, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6238, - "eval_samples_per_second": 272.509, - "eval_steps_per_second": 4.809, - "step": 1728 + "eval_runtime": 1.2109, + "eval_samples_per_second": 140.387, + "eval_steps_per_second": 2.477, + "step": 384 }, { - "epoch": 19.0, - "grad_norm": 5.751409530639648, - "learning_rate": 4.05e-05, - "loss": 0.003, - "step": 1824 + "epoch": 5.0, + "grad_norm": 3.4944188594818115, + "learning_rate": 0.0, + "loss": 0.0103, + "step": 480 }, { - "epoch": 19.0, - "eval_LOCATION_f1": 0.9072164948453608, + "epoch": 5.0, + "eval_LOCATION_f1": 0.8934010152284263, "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.88, + "eval_LOCATION_precision": 0.8543689320388349, "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9123867069486405, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9207317073170732, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.967032967032967, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9705882352941176, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.0849042758345604, - "eval_overall_accuracy": 0.9859116022099448, - "eval_overall_f1": 0.9298245614035087, - "eval_overall_precision": 0.9275, - "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.56, - "eval_steps_per_second": 4.792, - "step": 1824 - }, - { - "epoch": 20.0, - "grad_norm": 0.3233324885368347, - "learning_rate": 4e-05, - "loss": 0.0035, - "step": 1920 - }, - { - "epoch": 20.0, - "eval_LOCATION_f1": 0.8969072164948454, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.87, - "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.08302651345729828, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9395465994962218, - "eval_overall_precision": 0.9419191919191919, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.612, - "eval_samples_per_second": 277.763, - "eval_steps_per_second": 4.902, - "step": 1920 - }, - { - "epoch": 21.0, - "grad_norm": 0.0027803461998701096, - "learning_rate": 3.9500000000000005e-05, - "loss": 0.0015, - "step": 2016 - }, - { - "epoch": 21.0, - "eval_LOCATION_f1": 0.8994708994708994, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8947368421052632, - "eval_LOCATION_recall": 0.9042553191489362, - "eval_ORGANIZATION_f1": 0.9226190476190477, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9171597633136095, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09646125137805939, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9346733668341709, - "eval_overall_precision": 0.9346733668341709, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6227, - "eval_samples_per_second": 273.012, - "eval_steps_per_second": 4.818, - "step": 2016 - }, - { - "epoch": 22.0, - "grad_norm": 0.012560858391225338, - "learning_rate": 3.9000000000000006e-05, - "loss": 0.0029, - "step": 2112 - }, - { - "epoch": 22.0, - "eval_LOCATION_f1": 0.8994708994708994, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8947368421052632, - "eval_LOCATION_recall": 0.9042553191489362, - "eval_ORGANIZATION_f1": 0.9112426035502958, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9005847953216374, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9565217391304348, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9496402877697842, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.1119152307510376, - "eval_overall_accuracy": 0.9831491712707182, - "eval_overall_f1": 0.9240348692403487, - "eval_overall_precision": 0.9160493827160494, - "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.622, - "eval_samples_per_second": 273.291, - "eval_steps_per_second": 4.823, - "step": 2112 - }, - { - "epoch": 23.0, - "grad_norm": 0.015381195582449436, - "learning_rate": 3.85e-05, - "loss": 0.0031, - "step": 2208 - }, - { - "epoch": 23.0, - "eval_LOCATION_f1": 0.934010152284264, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8932038834951457, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.8952380952380953, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9527027027027027, - "eval_ORGANIZATION_recall": 0.844311377245509, - "eval_PERSON_f1": 0.9672727272727273, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9637681159420289, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10207368433475494, - "eval_overall_accuracy": 0.9842541436464088, - "eval_overall_f1": 0.9301143583227447, - "eval_overall_precision": 0.9408740359897172, - "eval_overall_recall": 0.9195979899497487, - "eval_runtime": 0.6155, - "eval_samples_per_second": 276.214, - "eval_steps_per_second": 4.874, - "step": 2208 - }, - { - "epoch": 24.0, - "grad_norm": 0.029895633459091187, - "learning_rate": 3.8e-05, - "loss": 0.0023, - "step": 2304 - }, - { - "epoch": 24.0, - "eval_LOCATION_f1": 0.9081632653061226, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8725490196078431, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9123867069486405, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9207317073170732, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9743589743589743, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9779411764705882, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.08727628737688065, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9325, - "eval_overall_precision": 0.927860696517413, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6257, - "eval_samples_per_second": 271.688, - "eval_steps_per_second": 4.795, - "step": 2304 - }, - { - "epoch": 25.0, - "grad_norm": 1.8711739778518677, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.0029, - "step": 2400 - }, - { - "epoch": 25.0, - "eval_LOCATION_f1": 0.9045226130653266, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8571428571428571, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.8975903614457832, + "eval_ORGANIZATION_f1": 0.8895705521472391, "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9030303030303031, - "eval_ORGANIZATION_recall": 0.8922155688622755, - "eval_PERSON_f1": 0.9781021897810219, + "eval_ORGANIZATION_precision": 0.9119496855345912, + "eval_ORGANIZATION_recall": 0.8682634730538922, + "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9781021897810219, + "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.10519938915967941, - "eval_overall_accuracy": 0.9856353591160221, - "eval_overall_f1": 0.9267080745341615, - "eval_overall_precision": 0.9164619164619164, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6271, - "eval_samples_per_second": 271.08, - "eval_steps_per_second": 4.784, - "step": 2400 - }, - { - "epoch": 26.0, - "grad_norm": 0.8989447951316833, - "learning_rate": 3.7e-05, - "loss": 0.0033, - "step": 2496 - }, - { - "epoch": 26.0, - "eval_LOCATION_f1": 0.8640776699029127, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.7946428571428571, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9068322981366459, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9419354838709677, - "eval_ORGANIZATION_recall": 0.874251497005988, - "eval_PERSON_f1": 0.9708029197080292, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9708029197080292, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.12218903750181198, - "eval_overall_accuracy": 0.9828729281767956, - "eval_overall_f1": 0.917705735660848, - "eval_overall_precision": 0.9108910891089109, - "eval_overall_recall": 0.9246231155778895, - "eval_runtime": 0.6123, - "eval_samples_per_second": 277.664, - "eval_steps_per_second": 4.9, - "step": 2496 - }, - { - "epoch": 27.0, - "grad_norm": 0.014491462148725986, - "learning_rate": 3.65e-05, - "loss": 0.0021, - "step": 2592 - }, - { - "epoch": 27.0, - "eval_LOCATION_f1": 0.9035532994923857, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8640776699029126, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9090909090909091, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9202453987730062, - "eval_ORGANIZATION_recall": 0.8982035928143712, - "eval_PERSON_f1": 0.9708029197080292, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9708029197080292, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.11366433650255203, - "eval_overall_accuracy": 0.9853591160220995, - "eval_overall_f1": 0.9288389513108615, - "eval_overall_precision": 0.9230769230769231, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6239, - "eval_samples_per_second": 272.493, - "eval_steps_per_second": 4.809, - "step": 2592 - }, - { - "epoch": 28.0, - "grad_norm": 0.00040458361036144197, - "learning_rate": 3.6e-05, - "loss": 0.0014, - "step": 2688 + "eval_loss": 0.05397149175405502, + "eval_overall_accuracy": 0.9850828729281768, + "eval_overall_f1": 0.9232704402515725, + "eval_overall_precision": 0.924433249370277, + "eval_overall_recall": 0.9221105527638191, + "eval_runtime": 1.2561, + "eval_samples_per_second": 135.337, + "eval_steps_per_second": 2.388, + "step": 480 }, { - "epoch": 28.0, - "eval_LOCATION_f1": 0.8947368421052632, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8854166666666666, - "eval_LOCATION_recall": 0.9042553191489362, - "eval_ORGANIZATION_f1": 0.9085545722713864, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8953488372093024, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.0998811274766922, - "eval_overall_accuracy": 0.9861878453038674, - "eval_overall_f1": 0.9261576971214018, - "eval_overall_precision": 0.9226932668329177, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6221, - "eval_samples_per_second": 273.284, - "eval_steps_per_second": 4.823, - "step": 2688 - }, - { - "epoch": 29.0, - "grad_norm": 0.003671834012493491, - "learning_rate": 3.55e-05, - "loss": 0.0017, - "step": 2784 - }, - { - "epoch": 29.0, - "eval_LOCATION_f1": 0.8947368421052632, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8854166666666666, - "eval_LOCATION_recall": 0.9042553191489362, - "eval_ORGANIZATION_f1": 0.8988095238095238, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9675090252707581, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9571428571428572, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.09638147801160812, - "eval_overall_accuracy": 0.9842541436464088, - "eval_overall_f1": 0.9215442092154421, - "eval_overall_precision": 0.9135802469135802, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6201, - "eval_samples_per_second": 274.15, - "eval_steps_per_second": 4.838, - "step": 2784 - }, - { - "epoch": 30.0, - "grad_norm": 2.668123245239258, - "learning_rate": 3.5e-05, - "loss": 0.0064, - "step": 2880 - }, - { - "epoch": 30.0, - "eval_LOCATION_f1": 0.8947368421052632, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8854166666666666, - "eval_LOCATION_recall": 0.9042553191489362, - "eval_ORGANIZATION_f1": 0.9317507418397626, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9235294117647059, - "eval_ORGANIZATION_recall": 0.9401197604790419, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.0691131055355072, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9385194479297366, - "eval_overall_precision": 0.9373433583959899, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6245, - "eval_samples_per_second": 272.208, - "eval_steps_per_second": 4.804, - "step": 2880 - }, - { - "epoch": 31.0, - "grad_norm": 0.004453280474990606, - "learning_rate": 3.45e-05, - "loss": 0.0032, - "step": 2976 - }, - { - "epoch": 31.0, - "eval_LOCATION_f1": 0.9166666666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8979591836734694, - "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9235294117647059, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9075144508670521, - "eval_ORGANIZATION_recall": 0.9401197604790419, - "eval_PERSON_f1": 0.9851851851851852, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.08723258972167969, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9426433915211971, - "eval_overall_precision": 0.9356435643564357, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6234, - "eval_samples_per_second": 272.695, - "eval_steps_per_second": 4.812, - "step": 2976 - }, - { - "epoch": 32.0, - "grad_norm": 0.0032113208435475826, - "learning_rate": 3.4000000000000007e-05, - "loss": 0.0027, - "step": 3072 - }, - { - "epoch": 32.0, - "eval_LOCATION_f1": 0.9119170984455959, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8888888888888888, - "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9198813056379822, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9117647058823529, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.09214069694280624, - "eval_overall_accuracy": 0.9861878453038674, - "eval_overall_f1": 0.9349999999999999, - "eval_overall_precision": 0.9303482587064676, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6805, - "eval_samples_per_second": 249.808, - "eval_steps_per_second": 4.408, - "step": 3072 - }, - { - "epoch": 33.0, - "grad_norm": 0.0023403808008879423, - "learning_rate": 3.35e-05, - "loss": 0.0017, - "step": 3168 - }, - { - "epoch": 33.0, - "eval_LOCATION_f1": 0.90625, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8877551020408163, - "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.9333333333333335, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9447852760736196, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.966789667896679, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9776119402985075, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.09554101526737213, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.9382093316519546, - "eval_overall_precision": 0.9417721518987342, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6163, - "eval_samples_per_second": 275.837, - "eval_steps_per_second": 4.868, - "step": 3168 - }, - { - "epoch": 34.0, - "grad_norm": 0.0025567917618900537, - "learning_rate": 3.3e-05, - "loss": 0.0027, - "step": 3264 - }, - { - "epoch": 34.0, - "eval_LOCATION_f1": 0.9238578680203046, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.883495145631068, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9393939393939393, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.950920245398773, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10889267921447754, - "eval_overall_accuracy": 0.9867403314917127, - "eval_overall_f1": 0.9486858573216519, - "eval_overall_precision": 0.9451371571072319, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6143, - "eval_samples_per_second": 276.722, - "eval_steps_per_second": 4.883, - "step": 3264 - }, - { - "epoch": 35.0, - "grad_norm": 0.003604689845815301, - "learning_rate": 3.2500000000000004e-05, - "loss": 0.0024, - "step": 3360 - }, - { - "epoch": 35.0, - "eval_LOCATION_f1": 0.9137055837563451, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8737864077669902, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9365558912386707, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9451219512195121, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9710144927536232, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9640287769784173, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.09201914817094803, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9334975369458128, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6128, - "eval_samples_per_second": 277.43, - "eval_steps_per_second": 4.896, - "step": 3360 - }, - { - "epoch": 36.0, - "grad_norm": 0.009372674860060215, - "learning_rate": 3.2000000000000005e-05, - "loss": 0.0022, - "step": 3456 - }, - { - "epoch": 36.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9418960244648319, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9625, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9672727272727273, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9637681159420289, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.07915590703487396, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.950943396226415, - "eval_overall_precision": 0.9521410579345088, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6137, - "eval_samples_per_second": 277.022, - "eval_steps_per_second": 4.889, - "step": 3456 - }, - { - "epoch": 37.0, - "grad_norm": 0.008921943604946136, - "learning_rate": 3.15e-05, - "loss": 0.0013, - "step": 3552 - }, - { - "epoch": 37.0, - "eval_LOCATION_f1": 0.9230769230769231, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8910891089108911, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9393939393939393, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.950920245398773, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.09620564430952072, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.949748743718593, - "eval_overall_precision": 0.949748743718593, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6579, - "eval_samples_per_second": 258.391, - "eval_steps_per_second": 4.56, - "step": 3552 - }, - { - "epoch": 38.0, - "grad_norm": 0.0019985612016171217, - "learning_rate": 3.1e-05, - "loss": 0.0028, - "step": 3648 - }, - { - "epoch": 38.0, - "eval_LOCATION_f1": 0.9326424870466321, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.923076923076923, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9122807017543859, - "eval_ORGANIZATION_recall": 0.9341317365269461, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.0810953676700592, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9413233458177278, - "eval_overall_precision": 0.9354838709677419, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6203, - "eval_samples_per_second": 274.044, - "eval_steps_per_second": 4.836, - "step": 3648 - }, - { - "epoch": 39.0, - "grad_norm": 0.0019018716411665082, - "learning_rate": 3.05e-05, - "loss": 0.0022, - "step": 3744 - }, - { - "epoch": 39.0, - "eval_LOCATION_f1": 0.9015544041450778, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8787878787878788, - "eval_LOCATION_recall": 0.925531914893617, - "eval_ORGANIZATION_f1": 0.9161676646706587, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.1018829345703125, - "eval_overall_accuracy": 0.9859116022099448, - "eval_overall_f1": 0.9309912170639899, - "eval_overall_precision": 0.9298245614035088, - "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.6176, - "eval_samples_per_second": 275.269, - "eval_steps_per_second": 4.858, - "step": 3744 - }, - { - "epoch": 40.0, - "grad_norm": 0.06729024648666382, - "learning_rate": 3e-05, - "loss": 0.0012, - "step": 3840 - }, - { - "epoch": 40.0, - "eval_LOCATION_f1": 0.9119170984455959, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8888888888888888, - "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9181286549707602, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.8971428571428571, - "eval_ORGANIZATION_recall": 0.9401197604790419, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.096034474670887, - "eval_overall_accuracy": 0.9867403314917127, - "eval_overall_f1": 0.9366459627329192, - "eval_overall_precision": 0.9262899262899262, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.612, - "eval_samples_per_second": 277.787, - "eval_steps_per_second": 4.902, - "step": 3840 - }, - { - "epoch": 41.0, - "grad_norm": 0.0005613254033960402, - "learning_rate": 2.95e-05, - "loss": 0.0008, - "step": 3936 - }, - { - "epoch": 41.0, - "eval_LOCATION_f1": 0.9285714285714286, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8921568627450981, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9382716049382717, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9681528662420382, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.0963701382279396, - "eval_overall_accuracy": 0.988950276243094, - "eval_overall_f1": 0.9494949494949495, - "eval_overall_precision": 0.9543147208121827, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6226, - "eval_samples_per_second": 273.045, - "eval_steps_per_second": 4.818, - "step": 3936 - }, - { - "epoch": 42.0, - "grad_norm": 0.0030692138243466616, - "learning_rate": 2.9e-05, - "loss": 0.0015, - "step": 4032 - }, - { - "epoch": 42.0, - "eval_LOCATION_f1": 0.9319371727748691, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9074626865671641, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9047619047619048, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9708029197080292, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9708029197080292, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.07825793325901031, - "eval_overall_accuracy": 0.9881215469613259, - "eval_overall_f1": 0.9349999999999999, - "eval_overall_precision": 0.9303482587064676, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6264, - "eval_samples_per_second": 271.375, - "eval_steps_per_second": 4.789, - "step": 4032 - }, - { - "epoch": 43.0, - "grad_norm": 0.0004494467575568706, - "learning_rate": 2.8499999999999998e-05, - "loss": 0.0019, - "step": 4128 - }, - { - "epoch": 43.0, - "eval_LOCATION_f1": 0.934010152284264, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8932038834951457, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9272727272727274, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9386503067484663, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.07773718982934952, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9473684210526316, - "eval_overall_precision": 0.945, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6145, - "eval_samples_per_second": 276.668, - "eval_steps_per_second": 4.882, - "step": 4128 - }, - { - "epoch": 44.0, - "grad_norm": 0.0007905985112302005, - "learning_rate": 2.8000000000000003e-05, - "loss": 0.0011, - "step": 4224 - }, - { - "epoch": 44.0, - "eval_LOCATION_f1": 0.9246231155778896, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8761904761904762, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9422492401215805, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9567901234567902, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.08259343355894089, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9486858573216519, - "eval_overall_precision": 0.9451371571072319, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.731, - "eval_samples_per_second": 232.542, - "eval_steps_per_second": 4.104, - "step": 4224 - }, - { - "epoch": 45.0, - "grad_norm": 0.00027426957967691123, - "learning_rate": 2.7500000000000004e-05, - "loss": 0.0007, - "step": 4320 - }, - { - "epoch": 45.0, - "eval_LOCATION_f1": 0.9387755102040817, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9019607843137255, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9259259259259259, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9554140127388535, - "eval_ORGANIZATION_recall": 0.8982035928143712, - "eval_PERSON_f1": 0.9705882352941176, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9777777777777777, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.07949012517929077, - "eval_overall_accuracy": 0.9867403314917127, - "eval_overall_f1": 0.9444444444444445, - "eval_overall_precision": 0.949238578680203, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6776, - "eval_samples_per_second": 250.88, - "eval_steps_per_second": 4.427, - "step": 4320 - }, - { - "epoch": 46.0, - "grad_norm": 6.9263811111450195, - "learning_rate": 2.7000000000000002e-05, - "loss": 0.0025, - "step": 4416 - }, - { - "epoch": 46.0, - "eval_LOCATION_f1": 0.9435897435897436, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9108910891089109, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9144542772861356, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9011627906976745, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9739776951672863, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924242424242424, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.08157689869403839, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9414694894146949, - "eval_overall_precision": 0.9333333333333333, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6189, - "eval_samples_per_second": 274.66, - "eval_steps_per_second": 4.847, - "step": 4416 - }, - { - "epoch": 47.0, - "grad_norm": 0.001023626420646906, - "learning_rate": 2.6500000000000004e-05, - "loss": 0.0012, - "step": 4512 - }, - { - "epoch": 47.0, - "eval_LOCATION_f1": 0.9253731343283582, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9893617021276596, - "eval_ORGANIZATION_f1": 0.9353846153846154, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9620253164556962, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09457841515541077, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6136, - "eval_samples_per_second": 277.045, - "eval_steps_per_second": 4.889, - "step": 4512 - }, - { - "epoch": 48.0, - "grad_norm": 0.0005152701633051038, - "learning_rate": 2.6000000000000002e-05, - "loss": 0.0016, - "step": 4608 - }, - { - "epoch": 48.0, - "eval_LOCATION_f1": 0.9246231155778896, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8761904761904762, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.924924924924925, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.927710843373494, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.0960976779460907, - "eval_overall_accuracy": 0.987292817679558, - "eval_overall_f1": 0.9426433915211971, - "eval_overall_precision": 0.9356435643564357, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6176, - "eval_samples_per_second": 275.273, - "eval_steps_per_second": 4.858, - "step": 4608 - }, - { - "epoch": 49.0, - "grad_norm": 0.0012280733790248632, - "learning_rate": 2.5500000000000003e-05, - "loss": 0.001, - "step": 4704 - }, - { - "epoch": 49.0, - "eval_LOCATION_f1": 0.9333333333333335, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.900990099009901, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9285714285714287, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9230769230769231, - "eval_ORGANIZATION_recall": 0.9341317365269461, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.09950720518827438, - "eval_overall_accuracy": 0.9864640883977901, - "eval_overall_f1": 0.9476309226932669, - "eval_overall_precision": 0.9405940594059405, - "eval_overall_recall": 0.9547738693467337, - "eval_runtime": 0.6234, - "eval_samples_per_second": 272.715, - "eval_steps_per_second": 4.813, - "step": 4704 - }, - { - "epoch": 50.0, - "grad_norm": 0.00443949643522501, - "learning_rate": 2.5e-05, - "loss": 0.001, - "step": 4800 - }, - { - "epoch": 50.0, - "eval_LOCATION_f1": 0.934010152284264, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8932038834951457, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9259259259259259, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9554140127388535, - "eval_ORGANIZATION_recall": 0.8982035928143712, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.0988643616437912, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.943109987357775, - "eval_overall_precision": 0.9491094147582697, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6234, - "eval_samples_per_second": 272.715, - "eval_steps_per_second": 4.813, - "step": 4800 - }, - { - "epoch": 51.0, - "grad_norm": 0.0002302059147041291, - "learning_rate": 2.45e-05, - "loss": 0.0004, - "step": 4896 - }, - { - "epoch": 51.0, - "eval_LOCATION_f1": 0.9387755102040817, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9019607843137255, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9151515151515152, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9263803680981595, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9743589743589743, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9779411764705882, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.11296043545007706, - "eval_overall_accuracy": 0.9870165745856354, - "eval_overall_f1": 0.9411764705882354, - "eval_overall_precision": 0.9376558603491272, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6148, - "eval_samples_per_second": 276.493, - "eval_steps_per_second": 4.879, - "step": 4896 - }, - { - "epoch": 52.0, - "grad_norm": 0.0004114691982977092, - "learning_rate": 2.4e-05, - "loss": 0.0007, - "step": 4992 - }, - { - "epoch": 52.0, - "eval_LOCATION_f1": 0.9381443298969071, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.91, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9212121212121211, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9325153374233128, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9743589743589743, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9779411764705882, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10785677284002304, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9435382685069008, - "eval_overall_precision": 0.9423558897243107, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6113, - "eval_samples_per_second": 278.074, - "eval_steps_per_second": 4.907, - "step": 4992 - }, - { - "epoch": 53.0, - "grad_norm": 0.0016004899516701698, - "learning_rate": 2.35e-05, - "loss": 0.0011, - "step": 5088 - }, - { - "epoch": 53.0, - "eval_LOCATION_f1": 0.9479166666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9285714285714286, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9235474006116209, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.94375, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9705882352941176, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9777777777777777, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10214179754257202, - "eval_overall_accuracy": 0.9878453038674033, - "eval_overall_f1": 0.9456384323640961, - "eval_overall_precision": 0.9516539440203562, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6154, - "eval_samples_per_second": 276.238, - "eval_steps_per_second": 4.875, - "step": 5088 - }, - { - "epoch": 54.0, - "grad_norm": 0.00200115074403584, - "learning_rate": 2.3000000000000003e-05, - "loss": 0.0009, - "step": 5184 - }, - { - "epoch": 54.0, - "eval_LOCATION_f1": 0.8958333333333333, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.8775510204081632, - "eval_LOCATION_recall": 0.9148936170212766, - "eval_ORGANIZATION_f1": 0.8979591836734695, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.875, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9705882352941176, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9777777777777777, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.12426385283470154, - "eval_overall_accuracy": 0.9825966850828729, - "eval_overall_f1": 0.9219330855018587, - "eval_overall_precision": 0.9095354523227384, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6204, - "eval_samples_per_second": 273.999, - "eval_steps_per_second": 4.835, - "step": 5184 - }, - { - "epoch": 55.0, - "grad_norm": 0.006822248920798302, - "learning_rate": 2.25e-05, - "loss": 0.0011, - "step": 5280 - }, - { - "epoch": 55.0, - "eval_LOCATION_f1": 0.9381443298969071, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.91, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9161676646706587, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.08817728608846664, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9411764705882354, - "eval_overall_precision": 0.9376558603491272, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6164, - "eval_samples_per_second": 275.795, - "eval_steps_per_second": 4.867, - "step": 5280 - }, - { - "epoch": 56.0, - "grad_norm": 0.002012253738939762, - "learning_rate": 2.2000000000000003e-05, - "loss": 0.0004, - "step": 5376 - }, - { - "epoch": 56.0, - "eval_LOCATION_f1": 0.9479166666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9285714285714286, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9235474006116209, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.94375, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.08795258402824402, - "eval_overall_accuracy": 0.9900552486187846, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6131, - "eval_samples_per_second": 277.291, - "eval_steps_per_second": 4.893, - "step": 5376 - }, - { - "epoch": 57.0, - "grad_norm": 0.001478194841183722, - "learning_rate": 2.15e-05, - "loss": 0.0006, - "step": 5472 - }, - { - "epoch": 57.0, - "eval_LOCATION_f1": 0.9333333333333335, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.900990099009901, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.913946587537092, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9058823529411765, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10102926194667816, - "eval_overall_accuracy": 0.987292817679558, - "eval_overall_f1": 0.9389788293897883, - "eval_overall_precision": 0.9308641975308642, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6159, - "eval_samples_per_second": 276.031, - "eval_steps_per_second": 4.871, - "step": 5472 - }, - { - "epoch": 58.0, - "grad_norm": 0.0009641240467317402, - "learning_rate": 2.1e-05, - "loss": 0.0006, - "step": 5568 - }, - { - "epoch": 58.0, - "eval_LOCATION_f1": 0.9333333333333335, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.900990099009901, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9166666666666667, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9112426035502958, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.09803132712841034, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9414694894146949, - "eval_overall_precision": 0.9333333333333333, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.7109, - "eval_samples_per_second": 239.12, - "eval_steps_per_second": 4.22, - "step": 5568 - }, - { - "epoch": 59.0, - "grad_norm": 0.00035870648571290076, - "learning_rate": 2.05e-05, - "loss": 0.0003, - "step": 5664 - }, - { - "epoch": 59.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9263803680981595, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.949685534591195, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9705882352941176, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9777777777777777, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09932650625705719, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9456384323640961, - "eval_overall_precision": 0.9516539440203562, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6278, - "eval_samples_per_second": 270.773, - "eval_steps_per_second": 4.778, - "step": 5664 - }, - { - "epoch": 60.0, - "grad_norm": 0.005340063478797674, - "learning_rate": 2e-05, - "loss": 0.0003, - "step": 5760 - }, - { - "epoch": 60.0, - "eval_LOCATION_f1": 0.9278350515463918, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9216867469879517, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.09833351522684097, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9423558897243106, - "eval_overall_precision": 0.94, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6201, - "eval_samples_per_second": 274.141, - "eval_steps_per_second": 4.838, - "step": 5760 - }, - { - "epoch": 61.0, - "grad_norm": 0.0003080039459746331, - "learning_rate": 1.9500000000000003e-05, - "loss": 0.0003, - "step": 5856 - }, - { - "epoch": 61.0, - "eval_LOCATION_f1": 0.9381443298969071, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.91, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.906906906906907, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9096385542168675, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09371061623096466, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.93734335839599, - "eval_overall_precision": 0.935, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6239, - "eval_samples_per_second": 272.497, - "eval_steps_per_second": 4.809, - "step": 5856 - }, - { - "epoch": 62.0, - "grad_norm": 0.0016579064540565014, - "learning_rate": 1.9e-05, - "loss": 0.0011, - "step": 5952 - }, - { - "epoch": 62.0, - "eval_LOCATION_f1": 0.9238578680203046, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.883495145631068, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9263803680981595, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.949685534591195, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.1108747124671936, - "eval_overall_accuracy": 0.9867403314917127, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6246, - "eval_samples_per_second": 272.177, - "eval_steps_per_second": 4.803, - "step": 5952 - }, - { - "epoch": 63.0, - "grad_norm": 0.0014240954769775271, - "learning_rate": 1.85e-05, - "loss": 0.0009, - "step": 6048 - }, - { - "epoch": 63.0, - "eval_LOCATION_f1": 0.9479166666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9285714285714286, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9161676646706587, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.0866396427154541, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6176, - "eval_samples_per_second": 275.269, - "eval_steps_per_second": 4.858, - "step": 6048 - }, - { - "epoch": 64.0, - "grad_norm": 0.0007542133680544794, - "learning_rate": 1.8e-05, - "loss": 0.0004, - "step": 6144 - }, - { - "epoch": 64.0, - "eval_LOCATION_f1": 0.9072164948453608, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.88, - "eval_LOCATION_recall": 0.9361702127659575, - "eval_ORGANIZATION_f1": 0.9221556886227545, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9221556886227545, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9777777777777779, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9924812030075187, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.1202077716588974, - "eval_overall_accuracy": 0.9848066298342542, - "eval_overall_f1": 0.93734335839599, - "eval_overall_precision": 0.935, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.628, - "eval_samples_per_second": 270.7, - "eval_steps_per_second": 4.777, - "step": 6144 - }, - { - "epoch": 65.0, - "grad_norm": 0.0002191825769841671, - "learning_rate": 1.75e-05, - "loss": 0.0023, - "step": 6240 - }, - { - "epoch": 65.0, - "eval_LOCATION_f1": 0.9533678756476683, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9292929292929293, - "eval_LOCATION_recall": 0.9787234042553191, - "eval_ORGANIZATION_f1": 0.9189189189189191, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9216867469879518, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9852941176470589, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925925925925926, - "eval_PERSON_recall": 0.9781021897810219, - "eval_loss": 0.09679195284843445, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9498746867167919, - "eval_overall_precision": 0.9475, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6251, - "eval_samples_per_second": 271.962, - "eval_steps_per_second": 4.799, - "step": 6240 - }, - { - "epoch": 66.0, - "grad_norm": 0.0010986309498548508, - "learning_rate": 1.7000000000000003e-05, - "loss": 0.0017, - "step": 6336 - }, - { - "epoch": 66.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10312038660049438, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9508196721311476, - "eval_overall_precision": 0.9544303797468354, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.7092, - "eval_samples_per_second": 239.702, - "eval_steps_per_second": 4.23, - "step": 6336 - }, - { - "epoch": 67.0, - "grad_norm": 0.0005643566255457699, - "learning_rate": 1.65e-05, - "loss": 0.0014, - "step": 6432 - }, - { - "epoch": 67.0, - "eval_LOCATION_f1": 0.9326424870466321, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9263803680981595, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.949685534591195, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.981549815498155, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9925373134328358, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10499503463506699, - "eval_overall_accuracy": 0.9881215469613259, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6298, - "eval_samples_per_second": 269.943, - "eval_steps_per_second": 4.764, - "step": 6432 - }, - { - "epoch": 68.0, - "grad_norm": 0.0005254722782410681, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.0007, - "step": 6528 - }, - { - "epoch": 68.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9259259259259259, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9554140127388535, - "eval_ORGANIZATION_recall": 0.8982035928143712, - "eval_PERSON_f1": 0.9851851851851852, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 1.0, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.10490843653678894, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9504447268106734, - "eval_overall_precision": 0.961439588688946, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.61, - "eval_samples_per_second": 278.701, - "eval_steps_per_second": 4.918, - "step": 6528 - }, - { - "epoch": 69.0, - "grad_norm": 0.018979080021381378, - "learning_rate": 1.55e-05, - "loss": 0.0005, - "step": 6624 - }, - { - "epoch": 69.0, - "eval_LOCATION_f1": 0.9375000000000001, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9183673469387755, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9166666666666667, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9112426035502958, - "eval_ORGANIZATION_recall": 0.9221556886227545, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09969516843557358, - "eval_overall_accuracy": 0.9859116022099448, - "eval_overall_f1": 0.9411764705882354, - "eval_overall_precision": 0.9376558603491272, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6163, - "eval_samples_per_second": 275.818, - "eval_steps_per_second": 4.867, - "step": 6624 - }, - { - "epoch": 70.0, - "grad_norm": 0.00027675795718096197, - "learning_rate": 1.5e-05, - "loss": 0.001, - "step": 6720 - }, - { - "epoch": 70.0, - "eval_LOCATION_f1": 0.9375000000000001, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9183673469387755, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9198813056379822, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9117647058823529, - "eval_ORGANIZATION_recall": 0.9281437125748503, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.1053897961974144, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9425, - "eval_overall_precision": 0.9378109452736318, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6397, - "eval_samples_per_second": 265.745, - "eval_steps_per_second": 4.69, - "step": 6720 - }, - { - "epoch": 71.0, - "grad_norm": 0.00024086404300760478, - "learning_rate": 1.45e-05, - "loss": 0.0005, - "step": 6816 - }, - { - "epoch": 71.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9357798165137615, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95625, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.09782951325178146, - "eval_overall_accuracy": 0.9900552486187846, - "eval_overall_f1": 0.9506953223767383, - "eval_overall_precision": 0.9567430025445293, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6144, - "eval_samples_per_second": 276.707, - "eval_steps_per_second": 4.883, - "step": 6816 - }, - { - "epoch": 72.0, - "grad_norm": 0.005460801534354687, - "learning_rate": 1.4000000000000001e-05, - "loss": 0.0008, - "step": 6912 - }, - { - "epoch": 72.0, - "eval_LOCATION_f1": 0.9368421052631578, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9270833333333334, - "eval_LOCATION_recall": 0.9468085106382979, - "eval_ORGANIZATION_f1": 0.9272727272727274, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9386503067484663, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.0954757034778595, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9456384323640961, - "eval_overall_precision": 0.9516539440203562, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6066, - "eval_samples_per_second": 280.257, - "eval_steps_per_second": 4.946, - "step": 6912 - }, - { - "epoch": 73.0, - "grad_norm": 0.0004871623241342604, - "learning_rate": 1.3500000000000001e-05, - "loss": 0.0005, - "step": 7008 - }, - { - "epoch": 73.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9329268292682927, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9503105590062112, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10084227472543716, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.949367088607595, - "eval_overall_precision": 0.9566326530612245, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6223, - "eval_samples_per_second": 273.189, - "eval_steps_per_second": 4.821, - "step": 7008 - }, - { - "epoch": 74.0, - "grad_norm": 0.00226827641017735, - "learning_rate": 1.3000000000000001e-05, - "loss": 0.0004, - "step": 7104 - }, - { - "epoch": 74.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.966789667896679, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9776119402985075, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.1032579094171524, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9455006337135614, - "eval_overall_precision": 0.9539641943734015, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.657, - "eval_samples_per_second": 258.764, - "eval_steps_per_second": 4.566, - "step": 7104 - }, - { - "epoch": 75.0, - "grad_norm": 0.0031442521139979362, - "learning_rate": 1.25e-05, - "loss": 0.0005, - "step": 7200 - }, - { - "epoch": 75.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9320987654320988, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9617834394904459, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9703703703703703, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9849624060150376, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.11296577751636505, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9477707006369427, - "eval_overall_precision": 0.9612403100775194, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6204, - "eval_samples_per_second": 274.019, - "eval_steps_per_second": 4.836, - "step": 7200 - }, - { - "epoch": 76.0, - "grad_norm": 0.001323927310295403, - "learning_rate": 1.2e-05, - "loss": 0.0007, - "step": 7296 - }, - { - "epoch": 76.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9244712990936556, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9329268292682927, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.966789667896679, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9776119402985075, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.1114824041724205, - "eval_overall_accuracy": 0.9883977900552486, - "eval_overall_f1": 0.9432534678436318, - "eval_overall_precision": 0.9468354430379747, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6209, - "eval_samples_per_second": 273.776, - "eval_steps_per_second": 4.831, - "step": 7296 - }, - { - "epoch": 77.0, - "grad_norm": 0.003065042197704315, - "learning_rate": 1.1500000000000002e-05, - "loss": 0.0006, - "step": 7392 - }, - { - "epoch": 77.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9259259259259259, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9554140127388535, - "eval_ORGANIZATION_recall": 0.8982035928143712, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.112979955971241, - "eval_overall_accuracy": 0.988950276243094, - "eval_overall_f1": 0.9479034307496823, - "eval_overall_precision": 0.9588688946015425, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6315, - "eval_samples_per_second": 269.218, - "eval_steps_per_second": 4.751, - "step": 7392 - }, - { - "epoch": 78.0, - "grad_norm": 7.085573196411133, - "learning_rate": 1.1000000000000001e-05, - "loss": 0.0005, - "step": 7488 - }, - { - "epoch": 78.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9141104294478527, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9371069182389937, - "eval_ORGANIZATION_recall": 0.8922155688622755, - "eval_PERSON_f1": 0.9632352941176471, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9703703703703703, - "eval_PERSON_recall": 0.9562043795620438, - "eval_loss": 0.11508027464151382, - "eval_overall_accuracy": 0.9875690607734806, - "eval_overall_f1": 0.9378960709759189, - "eval_overall_precision": 0.9462915601023018, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6139, - "eval_samples_per_second": 276.907, - "eval_steps_per_second": 4.887, - "step": 7488 - }, - { - "epoch": 79.0, - "grad_norm": 0.0023044480476528406, - "learning_rate": 1.05e-05, - "loss": 0.0003, - "step": 7584 - }, - { - "epoch": 79.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.11096746474504471, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6643, - "eval_samples_per_second": 255.917, - "eval_steps_per_second": 4.516, - "step": 7584 - }, - { - "epoch": 80.0, - "grad_norm": 0.00011201861343579367, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 7680 - }, - { - "epoch": 80.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.11268793046474457, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6122, - "eval_samples_per_second": 277.679, - "eval_steps_per_second": 4.9, - "step": 7680 - }, - { - "epoch": 81.0, - "grad_norm": 0.0002784592506941408, - "learning_rate": 9.5e-06, - "loss": 0.0003, - "step": 7776 - }, - { - "epoch": 81.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.11348237097263336, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6095, - "eval_samples_per_second": 278.928, - "eval_steps_per_second": 4.922, - "step": 7776 - }, - { - "epoch": 82.0, - "grad_norm": 0.00022091029677540064, - "learning_rate": 9e-06, - "loss": 0.0003, - "step": 7872 - }, - { - "epoch": 82.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10818831622600555, - "eval_overall_accuracy": 0.9886740331491712, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6203, - "eval_samples_per_second": 274.071, - "eval_steps_per_second": 4.837, - "step": 7872 - }, - { - "epoch": 83.0, - "grad_norm": 0.00032480747904628515, - "learning_rate": 8.500000000000002e-06, - "loss": 0.0006, - "step": 7968 - }, - { - "epoch": 83.0, - "eval_LOCATION_f1": 0.9430051813471503, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.937888198757764, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9741935483870968, - "eval_ORGANIZATION_recall": 0.9041916167664671, - "eval_PERSON_f1": 0.9779411764705882, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9851851851851852, - "eval_PERSON_recall": 0.9708029197080292, - "eval_loss": 0.11552965641021729, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.9529860228716646, - "eval_overall_precision": 0.9640102827763496, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.623, - "eval_samples_per_second": 272.886, - "eval_steps_per_second": 4.816, - "step": 7968 - }, - { - "epoch": 84.0, - "grad_norm": 0.0005326655227690935, - "learning_rate": 8.000000000000001e-06, - "loss": 0.0003, - "step": 8064 - }, - { - "epoch": 84.0, - "eval_LOCATION_f1": 0.9479166666666666, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9285714285714286, - "eval_LOCATION_recall": 0.9680851063829787, - "eval_ORGANIZATION_f1": 0.9386503067484663, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9622641509433962, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10718917846679688, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9531051964512041, - "eval_overall_precision": 0.9616368286445013, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.616, - "eval_samples_per_second": 275.964, - "eval_steps_per_second": 4.87, - "step": 8064 - }, - { - "epoch": 85.0, - "grad_norm": 0.0004018662730231881, - "learning_rate": 7.5e-06, - "loss": 0.0007, - "step": 8160 - }, - { - "epoch": 85.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9272727272727274, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9386503067484663, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10133817046880722, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.946969696969697, - "eval_overall_precision": 0.9517766497461929, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6043, - "eval_samples_per_second": 281.311, - "eval_steps_per_second": 4.964, - "step": 8160 - }, - { - "epoch": 86.0, - "grad_norm": 0.002058778889477253, - "learning_rate": 7.000000000000001e-06, - "loss": 0.0003, - "step": 8256 - }, - { - "epoch": 86.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10222224146127701, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9481668773704173, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6385, - "eval_samples_per_second": 266.268, - "eval_steps_per_second": 4.699, - "step": 8256 - }, - { - "epoch": 87.0, - "grad_norm": 0.00029380357591435313, - "learning_rate": 6.5000000000000004e-06, - "loss": 0.0002, - "step": 8352 - }, - { - "epoch": 87.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10264620184898376, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9481668773704173, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6718, - "eval_samples_per_second": 253.056, - "eval_steps_per_second": 4.466, - "step": 8352 - }, - { - "epoch": 88.0, - "grad_norm": 0.00025997136253863573, - "learning_rate": 6e-06, - "loss": 0.0002, - "step": 8448 - }, - { - "epoch": 88.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10285894572734833, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9481668773704173, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6216, - "eval_samples_per_second": 273.509, - "eval_steps_per_second": 4.827, - "step": 8448 - }, - { - "epoch": 89.0, - "grad_norm": 0.19176463782787323, - "learning_rate": 5.500000000000001e-06, - "loss": 0.0003, - "step": 8544 - }, - { - "epoch": 89.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9300911854103343, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.1034075915813446, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9481668773704173, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6284, - "eval_samples_per_second": 270.534, - "eval_steps_per_second": 4.774, - "step": 8544 - }, - { - "epoch": 90.0, - "grad_norm": 9.726906137075275e-05, - "learning_rate": 5e-06, - "loss": 0.0003, - "step": 8640 - }, - { - "epoch": 90.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9357798165137615, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95625, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10428432375192642, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9505703422053232, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6211, - "eval_samples_per_second": 273.713, - "eval_steps_per_second": 4.83, - "step": 8640 - }, - { - "epoch": 91.0, - "grad_norm": 0.0001318985887337476, - "learning_rate": 4.5e-06, - "loss": 0.0002, - "step": 8736 - }, - { - "epoch": 91.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10446962714195251, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9565217391304348, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6186, - "eval_samples_per_second": 274.801, - "eval_steps_per_second": 4.849, - "step": 8736 - }, - { - "epoch": 92.0, - "grad_norm": 0.0003957097651436925, - "learning_rate": 4.000000000000001e-06, - "loss": 0.0002, - "step": 8832 - }, - { - "epoch": 92.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9329268292682927, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9503105590062112, - "eval_ORGANIZATION_recall": 0.9161676646706587, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10467536002397537, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.949367088607595, - "eval_overall_precision": 0.9566326530612245, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6214, - "eval_samples_per_second": 273.573, - "eval_steps_per_second": 4.828, - "step": 8832 - }, - { - "epoch": 93.0, - "grad_norm": 0.00048287183744832873, - "learning_rate": 3.5000000000000004e-06, - "loss": 0.0003, - "step": 8928 - }, - { - "epoch": 93.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10538303107023239, - "eval_overall_accuracy": 0.9897790055248619, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9565217391304348, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6144, - "eval_samples_per_second": 276.699, - "eval_steps_per_second": 4.883, - "step": 8928 - }, - { - "epoch": 94.0, - "grad_norm": 0.00010792938701342791, - "learning_rate": 3e-06, - "loss": 0.0009, - "step": 9024 - }, - { - "epoch": 94.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9325153374233129, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9559748427672956, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10885108262300491, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.9492385786802031, - "eval_overall_precision": 0.958974358974359, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6137, - "eval_samples_per_second": 277.001, - "eval_steps_per_second": 4.888, - "step": 9024 - }, - { - "epoch": 95.0, - "grad_norm": 0.00041213424992747605, - "learning_rate": 2.5e-06, - "loss": 0.0004, - "step": 9120 - }, - { - "epoch": 95.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10332932323217392, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6203, - "eval_samples_per_second": 274.044, - "eval_steps_per_second": 4.836, - "step": 9120 - }, - { - "epoch": 96.0, - "grad_norm": 0.0002837861829902977, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.0002, - "step": 9216 - }, - { - "epoch": 96.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10279857367277145, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6235, - "eval_samples_per_second": 272.654, - "eval_steps_per_second": 4.812, - "step": 9216 - }, - { - "epoch": 97.0, - "grad_norm": 0.0002584067406132817, - "learning_rate": 1.5e-06, - "loss": 0.0002, - "step": 9312 - }, - { - "epoch": 97.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.9268292682926829, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10300398617982864, - "eval_overall_accuracy": 0.9895027624309393, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6161, - "eval_samples_per_second": 275.938, - "eval_steps_per_second": 4.869, - "step": 9312 - }, - { - "epoch": 98.0, - "grad_norm": 0.00020491515169851482, - "learning_rate": 1.0000000000000002e-06, - "loss": 0.0003, - "step": 9408 - }, - { - "epoch": 98.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10345587879419327, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9565217391304348, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6258, - "eval_samples_per_second": 271.659, - "eval_steps_per_second": 4.794, - "step": 9408 - }, - { - "epoch": 99.0, - "grad_norm": 0.0002451244508847594, - "learning_rate": 5.000000000000001e-07, - "loss": 0.0002, - "step": 9504 - }, - { - "epoch": 99.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10355650633573532, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9565217391304348, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6161, - "eval_samples_per_second": 275.943, - "eval_steps_per_second": 4.87, - "step": 9504 - }, - { - "epoch": 100.0, - "grad_norm": 0.0001467197434976697, - "learning_rate": 0.0, - "loss": 0.0002, - "step": 9600 - }, - { - "epoch": 100.0, - "eval_LOCATION_f1": 0.9424083769633509, - "eval_LOCATION_number": 94, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.9574468085106383, - "eval_ORGANIZATION_f1": 0.929663608562691, - "eval_ORGANIZATION_number": 167, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9101796407185628, - "eval_PERSON_f1": 0.9741697416974171, - "eval_PERSON_number": 137, - "eval_PERSON_precision": 0.9850746268656716, - "eval_PERSON_recall": 0.9635036496350365, - "eval_loss": 0.10357167571783066, - "eval_overall_accuracy": 0.9892265193370166, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9565217391304348, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6779, - "eval_samples_per_second": 250.774, - "eval_steps_per_second": 4.425, - "step": 9600 - }, - { - "epoch": 100.0, - "step": 9600, - "total_flos": 3851325939318660.0, - "train_loss": 0.005652450745304426, - "train_runtime": 2268.3568, - "train_samples_per_second": 67.45, - "train_steps_per_second": 4.232 + "epoch": 5.0, + "step": 480, + "total_flos": 192740587739700.0, + "train_loss": 0.07472380660474301, + "train_runtime": 222.8624, + "train_samples_per_second": 34.326, + "train_steps_per_second": 2.154 } ], "logging_steps": 500, - "max_steps": 9600, + "max_steps": 480, "num_input_tokens_seen": 0, - "num_train_epochs": 100, + "num_train_epochs": 5, "save_steps": 500, - "total_flos": 3851325939318660.0, + "total_flos": 192740587739700.0, "train_batch_size": 16, "trial_name": null, "trial_params": null