diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 9600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 4.475216865539551, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.2611, + "step": 96 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.9109947643979057, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8969072164948454, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.8967551622418879, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8837209302325582, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9854014598540146, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9854014598540146, + "eval_PERSON_recall": 0.9854014598540146, + "eval_loss": 0.046344444155693054, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9303482587064676, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.5089, + "eval_samples_per_second": 334.076, + "eval_steps_per_second": 5.895, + "step": 96 + }, + { + "epoch": 2.0, + "grad_norm": 5.619458198547363, + "learning_rate": 4.9e-05, + "loss": 0.0645, + "step": 192 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.8490566037735849, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.7627118644067796, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.8597560975609756, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8757763975155279, + "eval_ORGANIZATION_recall": 0.844311377245509, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.0677841454744339, + "eval_overall_accuracy": 0.9779005524861878, + "eval_overall_f1": 0.8976572133168926, + "eval_overall_precision": 0.8813559322033898, + "eval_overall_recall": 0.914572864321608, + "eval_runtime": 0.5311, + "eval_samples_per_second": 320.081, + "eval_steps_per_second": 5.648, + "step": 192 + }, + { + "epoch": 3.0, + "grad_norm": 0.5541238784790039, + "learning_rate": 4.85e-05, + "loss": 0.0354, + "step": 288 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.9278350515463918, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.8988095238095238, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.893491124260355, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9818181818181817, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9782608695652174, + "eval_PERSON_recall": 0.9854014598540146, + "eval_loss": 0.04672340676188469, + "eval_overall_accuracy": 0.9861878453038674, + "eval_overall_f1": 0.9341614906832298, + "eval_overall_precision": 0.9238329238329238, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.5296, + "eval_samples_per_second": 321.026, + "eval_steps_per_second": 5.665, + "step": 288 + }, + { + "epoch": 4.0, + "grad_norm": 2.0090787410736084, + "learning_rate": 4.8e-05, + "loss": 0.0232, + "step": 384 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.9230769230769231, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8910891089108911, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9164086687306501, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.8862275449101796, + "eval_PERSON_f1": 0.9552238805970148, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9770992366412213, + "eval_PERSON_recall": 0.9343065693430657, + "eval_loss": 0.06353317946195602, + "eval_overall_accuracy": 0.9845303867403314, + "eval_overall_f1": 0.9312977099236641, + "eval_overall_precision": 0.9432989690721649, + "eval_overall_recall": 0.9195979899497487, + "eval_runtime": 0.5408, + "eval_samples_per_second": 314.335, + "eval_steps_per_second": 5.547, + "step": 384 + }, + { + "epoch": 5.0, + "grad_norm": 0.37130075693130493, + "learning_rate": 4.75e-05, + "loss": 0.0158, + "step": 480 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.91, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8584905660377359, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9022082018927444, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9533333333333334, + "eval_ORGANIZATION_recall": 0.8562874251497006, + "eval_PERSON_f1": 0.9854014598540146, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9854014598540146, + "eval_PERSON_recall": 0.9854014598540146, + "eval_loss": 0.05297553166747093, + "eval_overall_accuracy": 0.9861878453038674, + "eval_overall_f1": 0.9329962073324906, + "eval_overall_precision": 0.9389312977099237, + "eval_overall_recall": 0.9271356783919598, + "eval_runtime": 0.5568, + "eval_samples_per_second": 305.315, + "eval_steps_per_second": 5.388, + "step": 480 + }, + { + "epoch": 6.0, + "grad_norm": 1.5923467874526978, + "learning_rate": 4.7e-05, + "loss": 0.011, + "step": 576 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9129129129129129, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9816849816849818, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9852941176470589, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.05083903297781944, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9410288582183186, + "eval_overall_precision": 0.9398496240601504, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.5598, + "eval_samples_per_second": 303.698, + "eval_steps_per_second": 5.359, + "step": 576 + }, + { + "epoch": 7.0, + "grad_norm": 2.4925856590270996, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.0086, + "step": 672 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.925531914893617, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.925531914893617, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.9176470588235294, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9017341040462428, + "eval_ORGANIZATION_recall": 0.9341317365269461, + "eval_PERSON_f1": 0.9816849816849818, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9852941176470589, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.0657382383942604, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9413233458177278, + "eval_overall_precision": 0.9354838709677419, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.5677, + "eval_samples_per_second": 299.462, + "eval_steps_per_second": 5.285, + "step": 672 + }, + { + "epoch": 8.0, + "grad_norm": 0.08103451132774353, + "learning_rate": 4.600000000000001e-05, + "loss": 0.007, + "step": 768 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.900523560209424, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8865979381443299, + "eval_LOCATION_recall": 0.9148936170212766, + "eval_ORGANIZATION_f1": 0.8988095238095238, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.893491124260355, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.0754326730966568, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9261576971214018, + "eval_overall_precision": 0.9226932668329177, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.5838, + "eval_samples_per_second": 291.187, + "eval_steps_per_second": 5.139, + "step": 768 + }, + { + "epoch": 9.0, + "grad_norm": 0.2643495202064514, + "learning_rate": 4.55e-05, + "loss": 0.0061, + "step": 864 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.908108108108108, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_recall": 0.8936170212765957, + "eval_ORGANIZATION_f1": 0.9080459770114944, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8729281767955801, + "eval_ORGANIZATION_recall": 0.9461077844311377, + "eval_PERSON_f1": 0.988929889298893, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.07025933265686035, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9261083743842364, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.5996, + "eval_samples_per_second": 283.509, + "eval_steps_per_second": 5.003, + "step": 864 + }, + { + "epoch": 10.0, + "grad_norm": 0.8626702427864075, + "learning_rate": 4.5e-05, + "loss": 0.0058, + "step": 960 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.9032258064516129, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9130434782608695, + "eval_LOCATION_recall": 0.8936170212765957, + "eval_ORGANIZATION_f1": 0.9011627906976745, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8757062146892656, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9708029197080292, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9708029197080292, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.06500135362148285, + "eval_overall_accuracy": 0.9867403314917127, + "eval_overall_f1": 0.9253731343283583, + "eval_overall_precision": 0.916256157635468, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6116, + "eval_samples_per_second": 277.94, + "eval_steps_per_second": 4.905, + "step": 960 + }, + { + "epoch": 11.0, + "grad_norm": 0.020984740927815437, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.0048, + "step": 1056 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.9128205128205128, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8811881188118812, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9112426035502958, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9005847953216374, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.08488745987415314, + "eval_overall_accuracy": 0.9859116022099448, + "eval_overall_f1": 0.9328358208955224, + "eval_overall_precision": 0.9236453201970444, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6184, + "eval_samples_per_second": 274.893, + "eval_steps_per_second": 4.851, + "step": 1056 + }, + { + "epoch": 12.0, + "grad_norm": 0.3056125044822693, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.0057, + "step": 1152 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.9109947643979057, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8969072164948454, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.903225806451613, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8850574712643678, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.988929889298893, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.072464220225811, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.9339975093399752, + "eval_overall_precision": 0.9259259259259259, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.622, + "eval_samples_per_second": 273.318, + "eval_steps_per_second": 4.823, + "step": 1152 + }, + { + "epoch": 13.0, + "grad_norm": 0.08819713443517685, + "learning_rate": 4.35e-05, + "loss": 0.0052, + "step": 1248 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.9130434782608695, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.8936170212765957, + "eval_ORGANIZATION_f1": 0.8895522388059701, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8869047619047619, + "eval_ORGANIZATION_recall": 0.8922155688622755, + "eval_PERSON_f1": 0.9816849816849818, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9852941176470589, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.08716335147619247, + "eval_overall_accuracy": 0.9845303867403314, + "eval_overall_f1": 0.9267676767676767, + "eval_overall_precision": 0.9314720812182741, + "eval_overall_recall": 0.9221105527638191, + "eval_runtime": 0.6038, + "eval_samples_per_second": 281.559, + "eval_steps_per_second": 4.969, + "step": 1248 + }, + { + "epoch": 14.0, + "grad_norm": 0.0017481500981375575, + "learning_rate": 4.3e-05, + "loss": 0.002, + "step": 1344 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.8972972972972972, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9120879120879121, + "eval_LOCATION_recall": 0.8829787234042553, + "eval_ORGANIZATION_f1": 0.9285714285714287, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9230769230769231, + "eval_ORGANIZATION_recall": 0.9341317365269461, + "eval_PERSON_f1": 0.9816849816849818, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9852941176470589, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.07973892986774445, + "eval_overall_accuracy": 0.9881215469613259, + "eval_overall_f1": 0.9395465994962218, + "eval_overall_precision": 0.9419191919191919, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.6104, + "eval_samples_per_second": 278.491, + "eval_steps_per_second": 4.915, + "step": 1344 + }, + { + "epoch": 15.0, + "grad_norm": 5.35858154296875, + "learning_rate": 4.25e-05, + "loss": 0.0036, + "step": 1440 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.9297297297297297, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.945054945054945, + "eval_LOCATION_recall": 0.9148936170212766, + "eval_ORGANIZATION_f1": 0.9235294117647059, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9075144508670521, + "eval_ORGANIZATION_recall": 0.9401197604790419, + "eval_PERSON_f1": 0.988929889298893, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.08796700835227966, + "eval_overall_accuracy": 0.987292817679558, + "eval_overall_f1": 0.9472361809045227, + "eval_overall_precision": 0.9472361809045227, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.6289, + "eval_samples_per_second": 270.333, + "eval_steps_per_second": 4.771, + "step": 1440 + }, + { + "epoch": 16.0, + "grad_norm": 0.012317053973674774, + "learning_rate": 4.2e-05, + "loss": 0.0026, + "step": 1536 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8888888888888888, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8842105263157894, + "eval_LOCATION_recall": 0.8936170212765957, + "eval_ORGANIZATION_f1": 0.9305135951661632, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9390243902439024, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.08714718371629715, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.9368686868686869, + "eval_overall_precision": 0.9416243654822335, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.6145, + "eval_samples_per_second": 276.648, + "eval_steps_per_second": 4.882, + "step": 1536 + }, + { + "epoch": 17.0, + "grad_norm": 0.00884944200515747, + "learning_rate": 4.15e-05, + "loss": 0.0028, + "step": 1632 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.9175257731958764, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.924924924924925, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.927710843373494, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9851851851851852, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.08211695402860641, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9435382685069008, + "eval_overall_precision": 0.9423558897243107, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6144, + "eval_samples_per_second": 276.708, + "eval_steps_per_second": 4.883, + "step": 1632 + }, + { + "epoch": 18.0, + "grad_norm": 0.005733116064220667, + "learning_rate": 4.1e-05, + "loss": 0.0024, + "step": 1728 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.9166666666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9337349397590362, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9393939393939394, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.07464080303907394, + "eval_overall_accuracy": 0.988950276243094, + "eval_overall_f1": 0.9445843828715367, + "eval_overall_precision": 0.946969696969697, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6238, + "eval_samples_per_second": 272.509, + "eval_steps_per_second": 4.809, + "step": 1728 + }, + { + "epoch": 19.0, + "grad_norm": 5.751409530639648, + "learning_rate": 4.05e-05, + "loss": 0.003, + "step": 1824 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.9072164948453608, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.88, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9123867069486405, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9207317073170732, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.967032967032967, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9705882352941176, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.0849042758345604, + "eval_overall_accuracy": 0.9859116022099448, + "eval_overall_f1": 0.9298245614035087, + "eval_overall_precision": 0.9275, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.626, + "eval_samples_per_second": 271.56, + "eval_steps_per_second": 4.792, + "step": 1824 + }, + { + "epoch": 20.0, + "grad_norm": 0.3233324885368347, + "learning_rate": 4e-05, + "loss": 0.0035, + "step": 1920 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.8969072164948454, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.87, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.08302651345729828, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9395465994962218, + "eval_overall_precision": 0.9419191919191919, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.612, + "eval_samples_per_second": 277.763, + "eval_steps_per_second": 4.902, + "step": 1920 + }, + { + "epoch": 21.0, + "grad_norm": 0.0027803461998701096, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0015, + "step": 2016 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.8994708994708994, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8947368421052632, + "eval_LOCATION_recall": 0.9042553191489362, + "eval_ORGANIZATION_f1": 0.9226190476190477, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9171597633136095, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09646125137805939, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9346733668341709, + "eval_overall_precision": 0.9346733668341709, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6227, + "eval_samples_per_second": 273.012, + "eval_steps_per_second": 4.818, + "step": 2016 + }, + { + "epoch": 22.0, + "grad_norm": 0.012560858391225338, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0029, + "step": 2112 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.8994708994708994, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8947368421052632, + "eval_LOCATION_recall": 0.9042553191489362, + "eval_ORGANIZATION_f1": 0.9112426035502958, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9005847953216374, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9565217391304348, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9496402877697842, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.1119152307510376, + "eval_overall_accuracy": 0.9831491712707182, + "eval_overall_f1": 0.9240348692403487, + "eval_overall_precision": 0.9160493827160494, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.622, + "eval_samples_per_second": 273.291, + "eval_steps_per_second": 4.823, + "step": 2112 + }, + { + "epoch": 23.0, + "grad_norm": 0.015381195582449436, + "learning_rate": 3.85e-05, + "loss": 0.0031, + "step": 2208 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.934010152284264, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.8952380952380953, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9527027027027027, + "eval_ORGANIZATION_recall": 0.844311377245509, + "eval_PERSON_f1": 0.9672727272727273, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9637681159420289, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10207368433475494, + "eval_overall_accuracy": 0.9842541436464088, + "eval_overall_f1": 0.9301143583227447, + "eval_overall_precision": 0.9408740359897172, + "eval_overall_recall": 0.9195979899497487, + "eval_runtime": 0.6155, + "eval_samples_per_second": 276.214, + "eval_steps_per_second": 4.874, + "step": 2208 + }, + { + "epoch": 24.0, + "grad_norm": 0.029895633459091187, + "learning_rate": 3.8e-05, + "loss": 0.0023, + "step": 2304 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.9081632653061226, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8725490196078431, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9123867069486405, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9207317073170732, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9743589743589743, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9779411764705882, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.08727628737688065, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9325, + "eval_overall_precision": 0.927860696517413, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.6257, + "eval_samples_per_second": 271.688, + "eval_steps_per_second": 4.795, + "step": 2304 + }, + { + "epoch": 25.0, + "grad_norm": 1.8711739778518677, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0029, + "step": 2400 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.9045226130653266, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8571428571428571, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.8975903614457832, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9030303030303031, + "eval_ORGANIZATION_recall": 0.8922155688622755, + "eval_PERSON_f1": 0.9781021897810219, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9781021897810219, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.10519938915967941, + "eval_overall_accuracy": 0.9856353591160221, + "eval_overall_f1": 0.9267080745341615, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.6271, + "eval_samples_per_second": 271.08, + "eval_steps_per_second": 4.784, + "step": 2400 + }, + { + "epoch": 26.0, + "grad_norm": 0.8989447951316833, + "learning_rate": 3.7e-05, + "loss": 0.0033, + "step": 2496 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.8640776699029127, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.7946428571428571, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9068322981366459, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9419354838709677, + "eval_ORGANIZATION_recall": 0.874251497005988, + "eval_PERSON_f1": 0.9708029197080292, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9708029197080292, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.12218903750181198, + "eval_overall_accuracy": 0.9828729281767956, + "eval_overall_f1": 0.917705735660848, + "eval_overall_precision": 0.9108910891089109, + "eval_overall_recall": 0.9246231155778895, + "eval_runtime": 0.6123, + "eval_samples_per_second": 277.664, + "eval_steps_per_second": 4.9, + "step": 2496 + }, + { + "epoch": 27.0, + "grad_norm": 0.014491462148725986, + "learning_rate": 3.65e-05, + "loss": 0.0021, + "step": 2592 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.9035532994923857, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8640776699029126, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9090909090909091, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9202453987730062, + "eval_ORGANIZATION_recall": 0.8982035928143712, + "eval_PERSON_f1": 0.9708029197080292, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9708029197080292, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.11366433650255203, + "eval_overall_accuracy": 0.9853591160220995, + "eval_overall_f1": 0.9288389513108615, + "eval_overall_precision": 0.9230769230769231, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6239, + "eval_samples_per_second": 272.493, + "eval_steps_per_second": 4.809, + "step": 2592 + }, + { + "epoch": 28.0, + "grad_norm": 0.00040458361036144197, + "learning_rate": 3.6e-05, + "loss": 0.0014, + "step": 2688 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8947368421052632, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8854166666666666, + "eval_LOCATION_recall": 0.9042553191489362, + "eval_ORGANIZATION_f1": 0.9085545722713864, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8953488372093024, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.0998811274766922, + "eval_overall_accuracy": 0.9861878453038674, + "eval_overall_f1": 0.9261576971214018, + "eval_overall_precision": 0.9226932668329177, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.6221, + "eval_samples_per_second": 273.284, + "eval_steps_per_second": 4.823, + "step": 2688 + }, + { + "epoch": 29.0, + "grad_norm": 0.003671834012493491, + "learning_rate": 3.55e-05, + "loss": 0.0017, + "step": 2784 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.8947368421052632, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8854166666666666, + "eval_LOCATION_recall": 0.9042553191489362, + "eval_ORGANIZATION_f1": 0.8988095238095238, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.893491124260355, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9675090252707581, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9571428571428572, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.09638147801160812, + "eval_overall_accuracy": 0.9842541436464088, + "eval_overall_f1": 0.9215442092154421, + "eval_overall_precision": 0.9135802469135802, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.6201, + "eval_samples_per_second": 274.15, + "eval_steps_per_second": 4.838, + "step": 2784 + }, + { + "epoch": 30.0, + "grad_norm": 2.668123245239258, + "learning_rate": 3.5e-05, + "loss": 0.0064, + "step": 2880 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.8947368421052632, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8854166666666666, + "eval_LOCATION_recall": 0.9042553191489362, + "eval_ORGANIZATION_f1": 0.9317507418397626, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9235294117647059, + "eval_ORGANIZATION_recall": 0.9401197604790419, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.0691131055355072, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9385194479297366, + "eval_overall_precision": 0.9373433583959899, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6245, + "eval_samples_per_second": 272.208, + "eval_steps_per_second": 4.804, + "step": 2880 + }, + { + "epoch": 31.0, + "grad_norm": 0.004453280474990606, + "learning_rate": 3.45e-05, + "loss": 0.0032, + "step": 2976 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.9166666666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9235294117647059, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9075144508670521, + "eval_ORGANIZATION_recall": 0.9401197604790419, + "eval_PERSON_f1": 0.9851851851851852, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.08723258972167969, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9426433915211971, + "eval_overall_precision": 0.9356435643564357, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6234, + "eval_samples_per_second": 272.695, + "eval_steps_per_second": 4.812, + "step": 2976 + }, + { + "epoch": 32.0, + "grad_norm": 0.0032113208435475826, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0027, + "step": 3072 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.9119170984455959, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9198813056379822, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9117647058823529, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.09214069694280624, + "eval_overall_accuracy": 0.9861878453038674, + "eval_overall_f1": 0.9349999999999999, + "eval_overall_precision": 0.9303482587064676, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6805, + "eval_samples_per_second": 249.808, + "eval_steps_per_second": 4.408, + "step": 3072 + }, + { + "epoch": 33.0, + "grad_norm": 0.0023403808008879423, + "learning_rate": 3.35e-05, + "loss": 0.0017, + "step": 3168 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.90625, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8877551020408163, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.9333333333333335, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.966789667896679, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9776119402985075, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.09554101526737213, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.9382093316519546, + "eval_overall_precision": 0.9417721518987342, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6163, + "eval_samples_per_second": 275.837, + "eval_steps_per_second": 4.868, + "step": 3168 + }, + { + "epoch": 34.0, + "grad_norm": 0.0025567917618900537, + "learning_rate": 3.3e-05, + "loss": 0.0027, + "step": 3264 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.9238578680203046, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.883495145631068, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9393939393939393, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10889267921447754, + "eval_overall_accuracy": 0.9867403314917127, + "eval_overall_f1": 0.9486858573216519, + "eval_overall_precision": 0.9451371571072319, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.6143, + "eval_samples_per_second": 276.722, + "eval_steps_per_second": 4.883, + "step": 3264 + }, + { + "epoch": 35.0, + "grad_norm": 0.003604689845815301, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0024, + "step": 3360 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.9137055837563451, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8737864077669902, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9365558912386707, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9451219512195121, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9710144927536232, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9640287769784173, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.09201914817094803, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.9427860696517413, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.6128, + "eval_samples_per_second": 277.43, + "eval_steps_per_second": 4.896, + "step": 3360 + }, + { + "epoch": 36.0, + "grad_norm": 0.009372674860060215, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0022, + "step": 3456 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9418960244648319, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9625, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9672727272727273, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9637681159420289, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.07915590703487396, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.950943396226415, + "eval_overall_precision": 0.9521410579345088, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6137, + "eval_samples_per_second": 277.022, + "eval_steps_per_second": 4.889, + "step": 3456 + }, + { + "epoch": 37.0, + "grad_norm": 0.008921943604946136, + "learning_rate": 3.15e-05, + "loss": 0.0013, + "step": 3552 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.9230769230769231, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8910891089108911, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9393939393939393, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.09620564430952072, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.949748743718593, + "eval_overall_precision": 0.949748743718593, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6579, + "eval_samples_per_second": 258.391, + "eval_steps_per_second": 4.56, + "step": 3552 + }, + { + "epoch": 38.0, + "grad_norm": 0.0019985612016171217, + "learning_rate": 3.1e-05, + "loss": 0.0028, + "step": 3648 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.923076923076923, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9122807017543859, + "eval_ORGANIZATION_recall": 0.9341317365269461, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.0810953676700592, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9413233458177278, + "eval_overall_precision": 0.9354838709677419, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.6203, + "eval_samples_per_second": 274.044, + "eval_steps_per_second": 4.836, + "step": 3648 + }, + { + "epoch": 39.0, + "grad_norm": 0.0019018716411665082, + "learning_rate": 3.05e-05, + "loss": 0.0022, + "step": 3744 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.9015544041450778, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8787878787878788, + "eval_LOCATION_recall": 0.925531914893617, + "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.1018829345703125, + "eval_overall_accuracy": 0.9859116022099448, + "eval_overall_f1": 0.9309912170639899, + "eval_overall_precision": 0.9298245614035088, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.6176, + "eval_samples_per_second": 275.269, + "eval_steps_per_second": 4.858, + "step": 3744 + }, + { + "epoch": 40.0, + "grad_norm": 0.06729024648666382, + "learning_rate": 3e-05, + "loss": 0.0012, + "step": 3840 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.9119170984455959, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9181286549707602, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.8971428571428571, + "eval_ORGANIZATION_recall": 0.9401197604790419, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.096034474670887, + "eval_overall_accuracy": 0.9867403314917127, + "eval_overall_f1": 0.9366459627329192, + "eval_overall_precision": 0.9262899262899262, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.612, + "eval_samples_per_second": 277.787, + "eval_steps_per_second": 4.902, + "step": 3840 + }, + { + "epoch": 41.0, + "grad_norm": 0.0005613254033960402, + "learning_rate": 2.95e-05, + "loss": 0.0008, + "step": 3936 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.9285714285714286, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8921568627450981, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9382716049382717, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9681528662420382, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.0963701382279396, + "eval_overall_accuracy": 0.988950276243094, + "eval_overall_f1": 0.9494949494949495, + "eval_overall_precision": 0.9543147208121827, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6226, + "eval_samples_per_second": 273.045, + "eval_steps_per_second": 4.818, + "step": 3936 + }, + { + "epoch": 42.0, + "grad_norm": 0.0030692138243466616, + "learning_rate": 2.9e-05, + "loss": 0.0015, + "step": 4032 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9074626865671641, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9708029197080292, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9708029197080292, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.07825793325901031, + "eval_overall_accuracy": 0.9881215469613259, + "eval_overall_f1": 0.9349999999999999, + "eval_overall_precision": 0.9303482587064676, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6264, + "eval_samples_per_second": 271.375, + "eval_steps_per_second": 4.789, + "step": 4032 + }, + { + "epoch": 43.0, + "grad_norm": 0.0004494467575568706, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0019, + "step": 4128 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.934010152284264, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9386503067484663, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.07773718982934952, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9473684210526316, + "eval_overall_precision": 0.945, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6145, + "eval_samples_per_second": 276.668, + "eval_steps_per_second": 4.882, + "step": 4128 + }, + { + "epoch": 44.0, + "grad_norm": 0.0007905985112302005, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0011, + "step": 4224 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.9246231155778896, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8761904761904762, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.08259343355894089, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9486858573216519, + "eval_overall_precision": 0.9451371571072319, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.731, + "eval_samples_per_second": 232.542, + "eval_steps_per_second": 4.104, + "step": 4224 + }, + { + "epoch": 45.0, + "grad_norm": 0.00027426957967691123, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0007, + "step": 4320 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.9387755102040817, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9019607843137255, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9259259259259259, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9554140127388535, + "eval_ORGANIZATION_recall": 0.8982035928143712, + "eval_PERSON_f1": 0.9705882352941176, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9777777777777777, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.07949012517929077, + "eval_overall_accuracy": 0.9867403314917127, + "eval_overall_f1": 0.9444444444444445, + "eval_overall_precision": 0.949238578680203, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6776, + "eval_samples_per_second": 250.88, + "eval_steps_per_second": 4.427, + "step": 4320 + }, + { + "epoch": 46.0, + "grad_norm": 6.9263811111450195, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0025, + "step": 4416 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.9435897435897436, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9108910891089109, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9144542772861356, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9011627906976745, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9739776951672863, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924242424242424, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.08157689869403839, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9414694894146949, + "eval_overall_precision": 0.9333333333333333, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6189, + "eval_samples_per_second": 274.66, + "eval_steps_per_second": 4.847, + "step": 4416 + }, + { + "epoch": 47.0, + "grad_norm": 0.001023626420646906, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0012, + "step": 4512 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.9253731343283582, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9893617021276596, + "eval_ORGANIZATION_f1": 0.9353846153846154, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9620253164556962, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09457841515541077, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9472361809045227, + "eval_overall_precision": 0.9472361809045227, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.6136, + "eval_samples_per_second": 277.045, + "eval_steps_per_second": 4.889, + "step": 4512 + }, + { + "epoch": 48.0, + "grad_norm": 0.0005152701633051038, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0016, + "step": 4608 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.9246231155778896, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8761904761904762, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.924924924924925, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.927710843373494, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.0960976779460907, + "eval_overall_accuracy": 0.987292817679558, + "eval_overall_f1": 0.9426433915211971, + "eval_overall_precision": 0.9356435643564357, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.6176, + "eval_samples_per_second": 275.273, + "eval_steps_per_second": 4.858, + "step": 4608 + }, + { + "epoch": 49.0, + "grad_norm": 0.0012280733790248632, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.001, + "step": 4704 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.9333333333333335, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.900990099009901, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9285714285714287, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9230769230769231, + "eval_ORGANIZATION_recall": 0.9341317365269461, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.09950720518827438, + "eval_overall_accuracy": 0.9864640883977901, + "eval_overall_f1": 0.9476309226932669, + "eval_overall_precision": 0.9405940594059405, + "eval_overall_recall": 0.9547738693467337, + "eval_runtime": 0.6234, + "eval_samples_per_second": 272.715, + "eval_steps_per_second": 4.813, + "step": 4704 + }, + { + "epoch": 50.0, + "grad_norm": 0.00443949643522501, + "learning_rate": 2.5e-05, + "loss": 0.001, + "step": 4800 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.934010152284264, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9259259259259259, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9554140127388535, + "eval_ORGANIZATION_recall": 0.8982035928143712, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.0988643616437912, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.943109987357775, + "eval_overall_precision": 0.9491094147582697, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.6234, + "eval_samples_per_second": 272.715, + "eval_steps_per_second": 4.813, + "step": 4800 + }, + { + "epoch": 51.0, + "grad_norm": 0.0002302059147041291, + "learning_rate": 2.45e-05, + "loss": 0.0004, + "step": 4896 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.9387755102040817, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9019607843137255, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9151515151515152, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9263803680981595, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9743589743589743, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9779411764705882, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.11296043545007706, + "eval_overall_accuracy": 0.9870165745856354, + "eval_overall_f1": 0.9411764705882354, + "eval_overall_precision": 0.9376558603491272, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6148, + "eval_samples_per_second": 276.493, + "eval_steps_per_second": 4.879, + "step": 4896 + }, + { + "epoch": 52.0, + "grad_norm": 0.0004114691982977092, + "learning_rate": 2.4e-05, + "loss": 0.0007, + "step": 4992 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.9381443298969071, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.91, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9212121212121211, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9743589743589743, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9779411764705882, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10785677284002304, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9435382685069008, + "eval_overall_precision": 0.9423558897243107, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6113, + "eval_samples_per_second": 278.074, + "eval_steps_per_second": 4.907, + "step": 4992 + }, + { + "epoch": 53.0, + "grad_norm": 0.0016004899516701698, + "learning_rate": 2.35e-05, + "loss": 0.0011, + "step": 5088 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.9479166666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9235474006116209, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.94375, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9705882352941176, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9777777777777777, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10214179754257202, + "eval_overall_accuracy": 0.9878453038674033, + "eval_overall_f1": 0.9456384323640961, + "eval_overall_precision": 0.9516539440203562, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6154, + "eval_samples_per_second": 276.238, + "eval_steps_per_second": 4.875, + "step": 5088 + }, + { + "epoch": 54.0, + "grad_norm": 0.00200115074403584, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0009, + "step": 5184 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.8958333333333333, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.8775510204081632, + "eval_LOCATION_recall": 0.9148936170212766, + "eval_ORGANIZATION_f1": 0.8979591836734695, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.875, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9705882352941176, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9777777777777777, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.12426385283470154, + "eval_overall_accuracy": 0.9825966850828729, + "eval_overall_f1": 0.9219330855018587, + "eval_overall_precision": 0.9095354523227384, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6204, + "eval_samples_per_second": 273.999, + "eval_steps_per_second": 4.835, + "step": 5184 + }, + { + "epoch": 55.0, + "grad_norm": 0.006822248920798302, + "learning_rate": 2.25e-05, + "loss": 0.0011, + "step": 5280 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.9381443298969071, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.91, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.08817728608846664, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9411764705882354, + "eval_overall_precision": 0.9376558603491272, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6164, + "eval_samples_per_second": 275.795, + "eval_steps_per_second": 4.867, + "step": 5280 + }, + { + "epoch": 56.0, + "grad_norm": 0.002012253738939762, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0004, + "step": 5376 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.9479166666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9235474006116209, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.94375, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.08795258402824402, + "eval_overall_accuracy": 0.9900552486187846, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6131, + "eval_samples_per_second": 277.291, + "eval_steps_per_second": 4.893, + "step": 5376 + }, + { + "epoch": 57.0, + "grad_norm": 0.001478194841183722, + "learning_rate": 2.15e-05, + "loss": 0.0006, + "step": 5472 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.9333333333333335, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.900990099009901, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.913946587537092, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10102926194667816, + "eval_overall_accuracy": 0.987292817679558, + "eval_overall_f1": 0.9389788293897883, + "eval_overall_precision": 0.9308641975308642, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.6159, + "eval_samples_per_second": 276.031, + "eval_steps_per_second": 4.871, + "step": 5472 + }, + { + "epoch": 58.0, + "grad_norm": 0.0009641240467317402, + "learning_rate": 2.1e-05, + "loss": 0.0006, + "step": 5568 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.9333333333333335, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.900990099009901, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9166666666666667, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9112426035502958, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.09803132712841034, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9414694894146949, + "eval_overall_precision": 0.9333333333333333, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.7109, + "eval_samples_per_second": 239.12, + "eval_steps_per_second": 4.22, + "step": 5568 + }, + { + "epoch": 59.0, + "grad_norm": 0.00035870648571290076, + "learning_rate": 2.05e-05, + "loss": 0.0003, + "step": 5664 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9263803680981595, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.949685534591195, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9705882352941176, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9777777777777777, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09932650625705719, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9456384323640961, + "eval_overall_precision": 0.9516539440203562, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6278, + "eval_samples_per_second": 270.773, + "eval_steps_per_second": 4.778, + "step": 5664 + }, + { + "epoch": 60.0, + "grad_norm": 0.005340063478797674, + "learning_rate": 2e-05, + "loss": 0.0003, + "step": 5760 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.9278350515463918, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9216867469879517, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9272727272727272, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.09833351522684097, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9423558897243106, + "eval_overall_precision": 0.94, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6201, + "eval_samples_per_second": 274.141, + "eval_steps_per_second": 4.838, + "step": 5760 + }, + { + "epoch": 61.0, + "grad_norm": 0.0003080039459746331, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0003, + "step": 5856 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.9381443298969071, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.91, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.906906906906907, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9096385542168675, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09371061623096466, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.93734335839599, + "eval_overall_precision": 0.935, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6239, + "eval_samples_per_second": 272.497, + "eval_steps_per_second": 4.809, + "step": 5856 + }, + { + "epoch": 62.0, + "grad_norm": 0.0016579064540565014, + "learning_rate": 1.9e-05, + "loss": 0.0011, + "step": 5952 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.9238578680203046, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.883495145631068, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9263803680981595, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.949685534591195, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.1108747124671936, + "eval_overall_accuracy": 0.9867403314917127, + "eval_overall_f1": 0.9445843828715367, + "eval_overall_precision": 0.946969696969697, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6246, + "eval_samples_per_second": 272.177, + "eval_steps_per_second": 4.803, + "step": 5952 + }, + { + "epoch": 63.0, + "grad_norm": 0.0014240954769775271, + "learning_rate": 1.85e-05, + "loss": 0.0009, + "step": 6048 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.9479166666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.0866396427154541, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9447236180904522, + "eval_overall_precision": 0.9447236180904522, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6176, + "eval_samples_per_second": 275.269, + "eval_steps_per_second": 4.858, + "step": 6048 + }, + { + "epoch": 64.0, + "grad_norm": 0.0007542133680544794, + "learning_rate": 1.8e-05, + "loss": 0.0004, + "step": 6144 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.9072164948453608, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.88, + "eval_LOCATION_recall": 0.9361702127659575, + "eval_ORGANIZATION_f1": 0.9221556886227545, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9221556886227545, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9777777777777779, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9924812030075187, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.1202077716588974, + "eval_overall_accuracy": 0.9848066298342542, + "eval_overall_f1": 0.93734335839599, + "eval_overall_precision": 0.935, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.628, + "eval_samples_per_second": 270.7, + "eval_steps_per_second": 4.777, + "step": 6144 + }, + { + "epoch": 65.0, + "grad_norm": 0.0002191825769841671, + "learning_rate": 1.75e-05, + "loss": 0.0023, + "step": 6240 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.9533678756476683, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9292929292929293, + "eval_LOCATION_recall": 0.9787234042553191, + "eval_ORGANIZATION_f1": 0.9189189189189191, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9216867469879518, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9852941176470589, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925925925925926, + "eval_PERSON_recall": 0.9781021897810219, + "eval_loss": 0.09679195284843445, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9498746867167919, + "eval_overall_precision": 0.9475, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.6251, + "eval_samples_per_second": 271.962, + "eval_steps_per_second": 4.799, + "step": 6240 + }, + { + "epoch": 66.0, + "grad_norm": 0.0010986309498548508, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0017, + "step": 6336 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10312038660049438, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9508196721311476, + "eval_overall_precision": 0.9544303797468354, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.7092, + "eval_samples_per_second": 239.702, + "eval_steps_per_second": 4.23, + "step": 6336 + }, + { + "epoch": 67.0, + "grad_norm": 0.0005643566255457699, + "learning_rate": 1.65e-05, + "loss": 0.0014, + "step": 6432 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9263803680981595, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.949685534591195, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.981549815498155, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9925373134328358, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10499503463506699, + "eval_overall_accuracy": 0.9881215469613259, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6298, + "eval_samples_per_second": 269.943, + "eval_steps_per_second": 4.764, + "step": 6432 + }, + { + "epoch": 68.0, + "grad_norm": 0.0005254722782410681, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0007, + "step": 6528 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9259259259259259, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9554140127388535, + "eval_ORGANIZATION_recall": 0.8982035928143712, + "eval_PERSON_f1": 0.9851851851851852, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 1.0, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.10490843653678894, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9504447268106734, + "eval_overall_precision": 0.961439588688946, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.61, + "eval_samples_per_second": 278.701, + "eval_steps_per_second": 4.918, + "step": 6528 + }, + { + "epoch": 69.0, + "grad_norm": 0.018979080021381378, + "learning_rate": 1.55e-05, + "loss": 0.0005, + "step": 6624 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.9375000000000001, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9183673469387755, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9166666666666667, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9112426035502958, + "eval_ORGANIZATION_recall": 0.9221556886227545, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09969516843557358, + "eval_overall_accuracy": 0.9859116022099448, + "eval_overall_f1": 0.9411764705882354, + "eval_overall_precision": 0.9376558603491272, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6163, + "eval_samples_per_second": 275.818, + "eval_steps_per_second": 4.867, + "step": 6624 + }, + { + "epoch": 70.0, + "grad_norm": 0.00027675795718096197, + "learning_rate": 1.5e-05, + "loss": 0.001, + "step": 6720 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.9375000000000001, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9183673469387755, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9198813056379822, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9117647058823529, + "eval_ORGANIZATION_recall": 0.9281437125748503, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.1053897961974144, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9425, + "eval_overall_precision": 0.9378109452736318, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.6397, + "eval_samples_per_second": 265.745, + "eval_steps_per_second": 4.69, + "step": 6720 + }, + { + "epoch": 71.0, + "grad_norm": 0.00024086404300760478, + "learning_rate": 1.45e-05, + "loss": 0.0005, + "step": 6816 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9357798165137615, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95625, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.09782951325178146, + "eval_overall_accuracy": 0.9900552486187846, + "eval_overall_f1": 0.9506953223767383, + "eval_overall_precision": 0.9567430025445293, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.6144, + "eval_samples_per_second": 276.707, + "eval_steps_per_second": 4.883, + "step": 6816 + }, + { + "epoch": 72.0, + "grad_norm": 0.005460801534354687, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0008, + "step": 6912 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.9368421052631578, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.9468085106382979, + "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9386503067484663, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.0954757034778595, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9456384323640961, + "eval_overall_precision": 0.9516539440203562, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6066, + "eval_samples_per_second": 280.257, + "eval_steps_per_second": 4.946, + "step": 6912 + }, + { + "epoch": 73.0, + "grad_norm": 0.0004871623241342604, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0005, + "step": 7008 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9503105590062112, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10084227472543716, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.949367088607595, + "eval_overall_precision": 0.9566326530612245, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6223, + "eval_samples_per_second": 273.189, + "eval_steps_per_second": 4.821, + "step": 7008 + }, + { + "epoch": 74.0, + "grad_norm": 0.00226827641017735, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0004, + "step": 7104 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.966789667896679, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9776119402985075, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.1032579094171524, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9455006337135614, + "eval_overall_precision": 0.9539641943734015, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.657, + "eval_samples_per_second": 258.764, + "eval_steps_per_second": 4.566, + "step": 7104 + }, + { + "epoch": 75.0, + "grad_norm": 0.0031442521139979362, + "learning_rate": 1.25e-05, + "loss": 0.0005, + "step": 7200 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9320987654320988, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9617834394904459, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9703703703703703, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9849624060150376, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.11296577751636505, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9477707006369427, + "eval_overall_precision": 0.9612403100775194, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.6204, + "eval_samples_per_second": 274.019, + "eval_steps_per_second": 4.836, + "step": 7200 + }, + { + "epoch": 76.0, + "grad_norm": 0.001323927310295403, + "learning_rate": 1.2e-05, + "loss": 0.0007, + "step": 7296 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9329268292682927, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.966789667896679, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9776119402985075, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.1114824041724205, + "eval_overall_accuracy": 0.9883977900552486, + "eval_overall_f1": 0.9432534678436318, + "eval_overall_precision": 0.9468354430379747, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6209, + "eval_samples_per_second": 273.776, + "eval_steps_per_second": 4.831, + "step": 7296 + }, + { + "epoch": 77.0, + "grad_norm": 0.003065042197704315, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0006, + "step": 7392 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9259259259259259, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9554140127388535, + "eval_ORGANIZATION_recall": 0.8982035928143712, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.112979955971241, + "eval_overall_accuracy": 0.988950276243094, + "eval_overall_f1": 0.9479034307496823, + "eval_overall_precision": 0.9588688946015425, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.6315, + "eval_samples_per_second": 269.218, + "eval_steps_per_second": 4.751, + "step": 7392 + }, + { + "epoch": 78.0, + "grad_norm": 7.085573196411133, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0005, + "step": 7488 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9141104294478527, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.8922155688622755, + "eval_PERSON_f1": 0.9632352941176471, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9703703703703703, + "eval_PERSON_recall": 0.9562043795620438, + "eval_loss": 0.11508027464151382, + "eval_overall_accuracy": 0.9875690607734806, + "eval_overall_f1": 0.9378960709759189, + "eval_overall_precision": 0.9462915601023018, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.6139, + "eval_samples_per_second": 276.907, + "eval_steps_per_second": 4.887, + "step": 7488 + }, + { + "epoch": 79.0, + "grad_norm": 0.0023044480476528406, + "learning_rate": 1.05e-05, + "loss": 0.0003, + "step": 7584 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.11096746474504471, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6643, + "eval_samples_per_second": 255.917, + "eval_steps_per_second": 4.516, + "step": 7584 + }, + { + "epoch": 80.0, + "grad_norm": 0.00011201861343579367, + "learning_rate": 1e-05, + "loss": 0.0002, + "step": 7680 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.11268793046474457, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6122, + "eval_samples_per_second": 277.679, + "eval_steps_per_second": 4.9, + "step": 7680 + }, + { + "epoch": 81.0, + "grad_norm": 0.0002784592506941408, + "learning_rate": 9.5e-06, + "loss": 0.0003, + "step": 7776 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.11348237097263336, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6095, + "eval_samples_per_second": 278.928, + "eval_steps_per_second": 4.922, + "step": 7776 + }, + { + "epoch": 82.0, + "grad_norm": 0.00022091029677540064, + "learning_rate": 9e-06, + "loss": 0.0003, + "step": 7872 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10818831622600555, + "eval_overall_accuracy": 0.9886740331491712, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6203, + "eval_samples_per_second": 274.071, + "eval_steps_per_second": 4.837, + "step": 7872 + }, + { + "epoch": 83.0, + "grad_norm": 0.00032480747904628515, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0006, + "step": 7968 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.937888198757764, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9741935483870968, + "eval_ORGANIZATION_recall": 0.9041916167664671, + "eval_PERSON_f1": 0.9779411764705882, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9851851851851852, + "eval_PERSON_recall": 0.9708029197080292, + "eval_loss": 0.11552965641021729, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.9529860228716646, + "eval_overall_precision": 0.9640102827763496, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.623, + "eval_samples_per_second": 272.886, + "eval_steps_per_second": 4.816, + "step": 7968 + }, + { + "epoch": 84.0, + "grad_norm": 0.0005326655227690935, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0003, + "step": 8064 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.9479166666666666, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.9680851063829787, + "eval_ORGANIZATION_f1": 0.9386503067484663, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9622641509433962, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10718917846679688, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9531051964512041, + "eval_overall_precision": 0.9616368286445013, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.616, + "eval_samples_per_second": 275.964, + "eval_steps_per_second": 4.87, + "step": 8064 + }, + { + "epoch": 85.0, + "grad_norm": 0.0004018662730231881, + "learning_rate": 7.5e-06, + "loss": 0.0007, + "step": 8160 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9386503067484663, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10133817046880722, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.946969696969697, + "eval_overall_precision": 0.9517766497461929, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6043, + "eval_samples_per_second": 281.311, + "eval_steps_per_second": 4.964, + "step": 8160 + }, + { + "epoch": 86.0, + "grad_norm": 0.002058778889477253, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0003, + "step": 8256 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10222224146127701, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9481668773704173, + "eval_overall_precision": 0.9541984732824428, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6385, + "eval_samples_per_second": 266.268, + "eval_steps_per_second": 4.699, + "step": 8256 + }, + { + "epoch": 87.0, + "grad_norm": 0.00029380357591435313, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0002, + "step": 8352 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10264620184898376, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9481668773704173, + "eval_overall_precision": 0.9541984732824428, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6718, + "eval_samples_per_second": 253.056, + "eval_steps_per_second": 4.466, + "step": 8352 + }, + { + "epoch": 88.0, + "grad_norm": 0.00025997136253863573, + "learning_rate": 6e-06, + "loss": 0.0002, + "step": 8448 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10285894572734833, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9481668773704173, + "eval_overall_precision": 0.9541984732824428, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6216, + "eval_samples_per_second": 273.509, + "eval_steps_per_second": 4.827, + "step": 8448 + }, + { + "epoch": 89.0, + "grad_norm": 0.19176463782787323, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0003, + "step": 8544 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.1034075915813446, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9481668773704173, + "eval_overall_precision": 0.9541984732824428, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6284, + "eval_samples_per_second": 270.534, + "eval_steps_per_second": 4.774, + "step": 8544 + }, + { + "epoch": 90.0, + "grad_norm": 9.726906137075275e-05, + "learning_rate": 5e-06, + "loss": 0.0003, + "step": 8640 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9357798165137615, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95625, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10428432375192642, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9505703422053232, + "eval_overall_precision": 0.959079283887468, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6211, + "eval_samples_per_second": 273.713, + "eval_steps_per_second": 4.83, + "step": 8640 + }, + { + "epoch": 91.0, + "grad_norm": 0.0001318985887337476, + "learning_rate": 4.5e-06, + "loss": 0.0002, + "step": 8736 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10446962714195251, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9480354879594423, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6186, + "eval_samples_per_second": 274.801, + "eval_steps_per_second": 4.849, + "step": 8736 + }, + { + "epoch": 92.0, + "grad_norm": 0.0003957097651436925, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0002, + "step": 8832 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9503105590062112, + "eval_ORGANIZATION_recall": 0.9161676646706587, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10467536002397537, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.949367088607595, + "eval_overall_precision": 0.9566326530612245, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.6214, + "eval_samples_per_second": 273.573, + "eval_steps_per_second": 4.828, + "step": 8832 + }, + { + "epoch": 93.0, + "grad_norm": 0.00048287183744832873, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0003, + "step": 8928 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10538303107023239, + "eval_overall_accuracy": 0.9897790055248619, + "eval_overall_f1": 0.9480354879594423, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6144, + "eval_samples_per_second": 276.699, + "eval_steps_per_second": 4.883, + "step": 8928 + }, + { + "epoch": 94.0, + "grad_norm": 0.00010792938701342791, + "learning_rate": 3e-06, + "loss": 0.0009, + "step": 9024 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9325153374233129, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9559748427672956, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10885108262300491, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.9492385786802031, + "eval_overall_precision": 0.958974358974359, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6137, + "eval_samples_per_second": 277.001, + "eval_steps_per_second": 4.888, + "step": 9024 + }, + { + "epoch": 95.0, + "grad_norm": 0.00041213424992747605, + "learning_rate": 2.5e-06, + "loss": 0.0004, + "step": 9120 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10332932323217392, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6203, + "eval_samples_per_second": 274.044, + "eval_steps_per_second": 4.836, + "step": 9120 + }, + { + "epoch": 96.0, + "grad_norm": 0.0002837861829902977, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0002, + "step": 9216 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10279857367277145, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6235, + "eval_samples_per_second": 272.654, + "eval_steps_per_second": 4.812, + "step": 9216 + }, + { + "epoch": 97.0, + "grad_norm": 0.0002584067406132817, + "learning_rate": 1.5e-06, + "loss": 0.0002, + "step": 9312 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.9440993788819876, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10300398617982864, + "eval_overall_accuracy": 0.9895027624309393, + "eval_overall_f1": 0.9468354430379746, + "eval_overall_precision": 0.9540816326530612, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6161, + "eval_samples_per_second": 275.938, + "eval_steps_per_second": 4.869, + "step": 9312 + }, + { + "epoch": 98.0, + "grad_norm": 0.00020491515169851482, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0003, + "step": 9408 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10345587879419327, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9480354879594423, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6258, + "eval_samples_per_second": 271.659, + "eval_steps_per_second": 4.794, + "step": 9408 + }, + { + "epoch": 99.0, + "grad_norm": 0.0002451244508847594, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0002, + "step": 9504 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10355650633573532, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9480354879594423, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6161, + "eval_samples_per_second": 275.943, + "eval_steps_per_second": 4.87, + "step": 9504 + }, + { + "epoch": 100.0, + "grad_norm": 0.0001467197434976697, + "learning_rate": 0.0, + "loss": 0.0002, + "step": 9600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 94, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.9574468085106383, + "eval_ORGANIZATION_f1": 0.929663608562691, + "eval_ORGANIZATION_number": 167, + "eval_ORGANIZATION_precision": 0.95, + "eval_ORGANIZATION_recall": 0.9101796407185628, + "eval_PERSON_f1": 0.9741697416974171, + "eval_PERSON_number": 137, + "eval_PERSON_precision": 0.9850746268656716, + "eval_PERSON_recall": 0.9635036496350365, + "eval_loss": 0.10357167571783066, + "eval_overall_accuracy": 0.9892265193370166, + "eval_overall_f1": 0.9480354879594423, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.6779, + "eval_samples_per_second": 250.774, + "eval_steps_per_second": 4.425, + "step": 9600 + }, + { + "epoch": 100.0, + "step": 9600, + "total_flos": 3851325939318660.0, + "train_loss": 0.005652450745304426, + "train_runtime": 2268.3568, + "train_samples_per_second": 67.45, + "train_steps_per_second": 4.232 + } + ], + "logging_steps": 500, + "max_steps": 9600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 3851325939318660.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}