{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.2767674922943115, "learning_rate": 4.9500000000000004e-05, "loss": 0.2555, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.8699551569506726, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8083333333333333, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8472622478386167, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8352272727272727, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9473684210526315, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9333333333333333, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07092644274234772, "eval_overall_accuracy": 0.9773543220104943, "eval_overall_f1": 0.8851674641148325, "eval_overall_precision": 0.8584686774941995, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2654, "eval_samples_per_second": 640.439, "eval_steps_per_second": 11.302, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.0877230167388916, "learning_rate": 4.9e-05, "loss": 0.0584, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.8622222222222223, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7950819672131147, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8888888888888888, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8666666666666667, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.05913590267300606, "eval_overall_accuracy": 0.9795636564484949, "eval_overall_f1": 0.9082240762812873, "eval_overall_precision": 0.8778801843317973, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2967, "eval_samples_per_second": 572.874, "eval_steps_per_second": 10.11, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.5676932334899902, "learning_rate": 4.85e-05, "loss": 0.0322, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.9473684210526316, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9339622641509434, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9149560117302051, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9176470588235294, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0347554013133049, "eval_overall_accuracy": 0.9897818282242474, "eval_overall_f1": 0.9421894218942188, "eval_overall_precision": 0.9387254901960784, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2742, "eval_samples_per_second": 620.093, "eval_steps_per_second": 10.943, "step": 288 }, { "epoch": 4.0, "grad_norm": 3.374647378921509, "learning_rate": 4.8e-05, "loss": 0.0184, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.9209302325581395, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8839285714285714, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8988095238095238, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9731800766283524, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9769230769230769, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.049937356263399124, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9251533742331289, "eval_overall_precision": 0.9195121951219513, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2755, "eval_samples_per_second": 617.113, "eval_steps_per_second": 10.89, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.9367578625679016, "learning_rate": 4.75e-05, "loss": 0.0164, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.9357798165137615, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8869565217391304, "eval_LOCATION_recall": 0.9902912621359223, "eval_ORGANIZATION_f1": 0.9046153846153846, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9692307692307692, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9767441860465116, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05169909819960594, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9339975093399752, "eval_overall_precision": 0.9422110552763819, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2716, "eval_samples_per_second": 625.866, "eval_steps_per_second": 11.045, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.5434015393257141, "learning_rate": 4.7e-05, "loss": 0.0092, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.9268292682926829, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9313725490196079, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9692307692307692, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9767441860465116, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.062066756188869476, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9301745635910224, "eval_overall_precision": 0.9395465994962217, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2815, "eval_samples_per_second": 603.931, "eval_steps_per_second": 10.658, "step": 576 }, { "epoch": 7.0, "grad_norm": 4.149354934692383, "learning_rate": 4.6500000000000005e-05, "loss": 0.0075, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.9056603773584906, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8807339449541285, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9164265129682997, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9034090909090909, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9429657794676807, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9393939393939394, "eval_PERSON_recall": 0.9465648854961832, "eval_loss": 0.06980126351118088, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9221411192214112, "eval_overall_precision": 0.9088729016786571, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.27, "eval_samples_per_second": 629.649, "eval_steps_per_second": 11.111, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.03421870991587639, "learning_rate": 4.600000000000001e-05, "loss": 0.0052, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.9423076923076923, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9333333333333333, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9325513196480938, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9352941176470588, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9615384615384615, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.052357617765665054, "eval_overall_accuracy": 0.9911626622479978, "eval_overall_f1": 0.9443757725587144, "eval_overall_precision": 0.9455445544554455, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.281, "eval_samples_per_second": 604.976, "eval_steps_per_second": 10.676, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.015103827230632305, "learning_rate": 4.55e-05, "loss": 0.0049, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.9108910891089108, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9292929292929293, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8999999999999999, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9053254437869822, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9578544061302683, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.06362461298704147, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9215442092154421, "eval_overall_precision": 0.9296482412060302, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2692, "eval_samples_per_second": 631.538, "eval_steps_per_second": 11.145, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.2673749029636383, "learning_rate": 4.5e-05, "loss": 0.005, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.9272727272727274, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8717948717948718, "eval_LOCATION_recall": 0.9902912621359223, "eval_ORGANIZATION_f1": 0.9171597633136095, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9281437125748503, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9433962264150944, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9328358208955224, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.07184188067913055, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.928311057108141, "eval_overall_precision": 0.9138755980861244, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.2853, "eval_samples_per_second": 595.828, "eval_steps_per_second": 10.515, "step": 960 }, { "epoch": 11.0, "grad_norm": 2.693599224090576, "learning_rate": 4.4500000000000004e-05, "loss": 0.0049, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.9411764705882353, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9504950495049505, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9289940828402367, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9401197604790419, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9578544061302683, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.05996091663837433, "eval_overall_accuracy": 0.9895056614194974, "eval_overall_f1": 0.9414694894146949, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2718, "eval_samples_per_second": 625.42, "eval_steps_per_second": 11.037, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.018131649121642113, "learning_rate": 4.4000000000000006e-05, "loss": 0.0042, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.9439252336448598, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9099099099099099, "eval_LOCATION_recall": 0.9805825242718447, "eval_ORGANIZATION_f1": 0.934131736526946, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9570552147239264, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9731800766283524, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9769230769230769, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06885772943496704, "eval_overall_accuracy": 0.9886771610052472, "eval_overall_f1": 0.9493201483312732, "eval_overall_precision": 0.9504950495049505, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2703, "eval_samples_per_second": 628.826, "eval_steps_per_second": 11.097, "step": 1152 }, { "epoch": 13.0, "grad_norm": 0.4223394989967346, "learning_rate": 4.35e-05, "loss": 0.0038, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.9326923076923078, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9238095238095239, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.934131736526946, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9570552147239264, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9655172413793103, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9692307692307692, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07858328521251678, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9439601494396015, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2675, "eval_samples_per_second": 635.432, "eval_steps_per_second": 11.214, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.4354705810546875, "learning_rate": 4.3e-05, "loss": 0.0035, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.9339622641509433, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.908256880733945, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9373134328358208, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9573170731707317, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9505703422053233, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.946969696969697, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.07810991257429123, "eval_overall_accuracy": 0.9892294946147473, "eval_overall_f1": 0.9407407407407408, "eval_overall_precision": 0.9407407407407408, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.272, "eval_samples_per_second": 625.044, "eval_steps_per_second": 11.03, "step": 1344 }, { "epoch": 15.0, "grad_norm": 2.229221820831299, "learning_rate": 4.25e-05, "loss": 0.0047, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.927536231884058, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.923076923076923, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9341317365269461, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9578544061302683, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.07066675275564194, "eval_overall_accuracy": 0.9884009942004971, "eval_overall_f1": 0.9354838709677419, "eval_overall_precision": 0.940149625935162, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2659, "eval_samples_per_second": 639.287, "eval_steps_per_second": 11.282, "step": 1440 }, { "epoch": 16.0, "grad_norm": 0.05903751030564308, "learning_rate": 4.2e-05, "loss": 0.0047, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.9223744292237444, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8706896551724138, "eval_LOCATION_recall": 0.9805825242718447, "eval_ORGANIZATION_f1": 0.8957055214723926, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9419354838709677, "eval_ORGANIZATION_recall": 0.8538011695906432, "eval_PERSON_f1": 0.9465648854961832, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.9465648854961832, "eval_loss": 0.08514788746833801, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.919454770755886, "eval_overall_precision": 0.9228855721393034, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2744, "eval_samples_per_second": 619.455, "eval_steps_per_second": 10.932, "step": 1536 }, { "epoch": 17.0, "grad_norm": 1.7228397130966187, "learning_rate": 4.15e-05, "loss": 0.0024, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.9268292682926829, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9313725490196079, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9298245614035088, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9298245614035088, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.9465648854961832, "eval_loss": 0.08027452230453491, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9379652605459058, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2683, "eval_samples_per_second": 633.676, "eval_steps_per_second": 11.183, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.003778486279770732, "learning_rate": 4.1e-05, "loss": 0.005, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.926605504587156, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8782608695652174, "eval_LOCATION_recall": 0.9805825242718447, "eval_ORGANIZATION_f1": 0.9221556886227544, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9447852760736196, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9425287356321839, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9461538461538461, "eval_PERSON_recall": 0.9389312977099237, "eval_loss": 0.07022350281476974, "eval_overall_accuracy": 0.988124827395747, "eval_overall_f1": 0.929889298892989, "eval_overall_precision": 0.9264705882352942, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2713, "eval_samples_per_second": 626.622, "eval_steps_per_second": 11.058, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.005008399952203035, "learning_rate": 4.05e-05, "loss": 0.0025, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.9383886255924171, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9166666666666666, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9226190476190477, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9393939393939394, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9323308270676691, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9185185185185185, "eval_PERSON_recall": 0.9465648854961832, "eval_loss": 0.07091382890939713, "eval_overall_accuracy": 0.9875724937862469, "eval_overall_f1": 0.929889298892989, "eval_overall_precision": 0.9264705882352942, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2685, "eval_samples_per_second": 633.213, "eval_steps_per_second": 11.174, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.0055842651054263115, "learning_rate": 4e-05, "loss": 0.0028, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.9209302325581395, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8839285714285714, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9063444108761328, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9615384615384615, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.07000420242547989, "eval_overall_accuracy": 0.9875724937862469, "eval_overall_f1": 0.9280397022332506, "eval_overall_precision": 0.9326683291770573, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.274, "eval_samples_per_second": 620.462, "eval_steps_per_second": 10.949, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.013501259498298168, "learning_rate": 3.9500000000000005e-05, "loss": 0.0046, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9320388349514563, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9320388349514563, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9216867469879517, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9503105590062112, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9696969696969696, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9624060150375939, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.08840980380773544, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.940149625935162, "eval_overall_precision": 0.9496221662468514, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2653, "eval_samples_per_second": 640.738, "eval_steps_per_second": 11.307, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.06725859642028809, "learning_rate": 3.9000000000000006e-05, "loss": 0.0032, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.9359605911330049, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.95, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9387755102040817, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.936046511627907, "eval_ORGANIZATION_recall": 0.9415204678362573, "eval_PERSON_f1": 0.9469696969696969, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9398496240601504, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.0810837596654892, "eval_overall_accuracy": 0.9884009942004971, "eval_overall_f1": 0.9407407407407408, "eval_overall_precision": 0.9407407407407408, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2668, "eval_samples_per_second": 637.067, "eval_steps_per_second": 11.242, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.009139477275311947, "learning_rate": 3.85e-05, "loss": 0.0021, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9339622641509433, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.908256880733945, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.918918918918919, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08566579967737198, "eval_overall_accuracy": 0.9875724937862469, "eval_overall_f1": 0.9369592088998763, "eval_overall_precision": 0.9381188118811881, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2674, "eval_samples_per_second": 635.666, "eval_steps_per_second": 11.218, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.39542409777641296, "learning_rate": 3.8e-05, "loss": 0.0015, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.9252336448598131, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8918918918918919, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9179331306990881, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9556962025316456, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08336959779262543, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9306930693069307, "eval_overall_precision": 0.9330024813895782, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2674, "eval_samples_per_second": 635.741, "eval_steps_per_second": 11.219, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.6857410669326782, "learning_rate": 3.7500000000000003e-05, "loss": 0.0024, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.91324200913242, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8620689655172413, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9015873015873015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9861111111111112, "eval_ORGANIZATION_recall": 0.8304093567251462, "eval_PERSON_f1": 0.9696969696969696, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9624060150375939, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.1184476986527443, "eval_overall_accuracy": 0.9831538249102458, "eval_overall_f1": 0.9273182957393483, "eval_overall_precision": 0.9414758269720102, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2697, "eval_samples_per_second": 630.231, "eval_steps_per_second": 11.122, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.005797994323074818, "learning_rate": 3.7e-05, "loss": 0.0028, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9259259259259259, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8849557522123894, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9085365853658538, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9490445859872612, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.951310861423221, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9338235294117647, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0842943862080574, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9272503082614056, "eval_overall_precision": 0.9261083743842364, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2786, "eval_samples_per_second": 610.296, "eval_steps_per_second": 10.77, "step": 2496 }, { "epoch": 27.0, "grad_norm": 0.19068512320518494, "learning_rate": 3.65e-05, "loss": 0.0026, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9135802469135803, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9673202614379085, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9363295880149813, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9191176470588235, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.09652017802000046, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9228855721393036, "eval_overall_precision": 0.9298245614035088, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2668, "eval_samples_per_second": 637.133, "eval_steps_per_second": 11.244, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.12579739093780518, "learning_rate": 3.6e-05, "loss": 0.003, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.9215686274509804, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9306930693069307, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.9277108433734939, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9565217391304348, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.09345203638076782, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.935, "eval_overall_precision": 0.9468354430379747, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2711, "eval_samples_per_second": 627.023, "eval_steps_per_second": 11.065, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.267172247171402, "learning_rate": 3.55e-05, "loss": 0.0039, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.8985507246376813, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8942307692307693, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.9285714285714285, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9454545454545454, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9769230769230769, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9844961240310077, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07543817162513733, "eval_overall_accuracy": 0.987848660590997, "eval_overall_f1": 0.9364881693648817, "eval_overall_precision": 0.9447236180904522, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2682, "eval_samples_per_second": 633.755, "eval_steps_per_second": 11.184, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.084771528840065, "learning_rate": 3.5e-05, "loss": 0.0035, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9383886255924171, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9166666666666666, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9382716049382716, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08519454300403595, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9333333333333333, "eval_overall_precision": 0.9333333333333333, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2706, "eval_samples_per_second": 628.143, "eval_steps_per_second": 11.085, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.16097508370876312, "learning_rate": 3.45e-05, "loss": 0.0025, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.91324200913242, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8620689655172413, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9125, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9798657718120806, "eval_ORGANIZATION_recall": 0.8538011695906432, "eval_PERSON_f1": 0.9660377358490565, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9552238805970149, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.11531849950551987, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9303482587064676, "eval_overall_precision": 0.9373433583959899, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2669, "eval_samples_per_second": 636.961, "eval_steps_per_second": 11.24, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.19988451898097992, "learning_rate": 3.4000000000000007e-05, "loss": 0.0025, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.9186602870813397, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9056603773584906, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9074626865671643, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9588014981273407, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9411764705882353, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.08991865068674088, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9272503082614056, "eval_overall_precision": 0.9261083743842364, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2714, "eval_samples_per_second": 626.454, "eval_steps_per_second": 11.055, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.012818277813494205, "learning_rate": 3.35e-05, "loss": 0.002, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.8981481481481481, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8584070796460177, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8936170212765956, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.930379746835443, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09652034193277359, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9171817058096416, "eval_overall_precision": 0.9183168316831684, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2691, "eval_samples_per_second": 631.621, "eval_steps_per_second": 11.146, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.4292362630367279, "learning_rate": 3.3e-05, "loss": 0.002, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9124423963133641, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.868421052631579, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9130434782608696, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9735099337748344, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0980546846985817, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9313358302122348, "eval_overall_precision": 0.9419191919191919, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2817, "eval_samples_per_second": 603.485, "eval_steps_per_second": 10.65, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.0029306651558727026, "learning_rate": 3.2500000000000004e-05, "loss": 0.0013, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9186602870813397, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9056603773584906, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.913946587537092, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.927710843373494, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9655172413793103, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9692307692307692, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08090686798095703, "eval_overall_accuracy": 0.9895056614194974, "eval_overall_f1": 0.9318463444857497, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2665, "eval_samples_per_second": 637.822, "eval_steps_per_second": 11.256, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.022452721372246742, "learning_rate": 3.2000000000000005e-05, "loss": 0.0007, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.9326923076923078, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9238095238095239, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9253731343283581, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9451219512195121, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08816061168909073, "eval_overall_accuracy": 0.9886771610052472, "eval_overall_f1": 0.9404466501240694, "eval_overall_precision": 0.9451371571072319, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2695, "eval_samples_per_second": 630.9, "eval_steps_per_second": 11.134, "step": 3456 }, { "epoch": 37.0, "grad_norm": 0.003817070508375764, "learning_rate": 3.15e-05, "loss": 0.0015, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9238095238095239, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9065420560747663, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0853443518280983, "eval_overall_accuracy": 0.9889533278099972, "eval_overall_f1": 0.9353233830845771, "eval_overall_precision": 0.9423558897243107, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.267, "eval_samples_per_second": 636.66, "eval_steps_per_second": 11.235, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.0011372750159353018, "learning_rate": 3.1e-05, "loss": 0.0004, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9377990430622011, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9245283018867925, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9258160237388725, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9397590361445783, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9692307692307692, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9767441860465116, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08106154203414917, "eval_overall_accuracy": 0.9895056614194974, "eval_overall_f1": 0.9429280397022332, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2743, "eval_samples_per_second": 619.841, "eval_steps_per_second": 10.938, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.0003786446468438953, "learning_rate": 3.05e-05, "loss": 0.0005, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9289099526066351, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9074074074074074, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9202453987730062, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.967741935483871, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9655172413793103, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9692307692307692, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.10155560076236725, "eval_overall_accuracy": 0.988124827395747, "eval_overall_f1": 0.9373433583959899, "eval_overall_precision": 0.9516539440203562, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2684, "eval_samples_per_second": 633.282, "eval_steps_per_second": 11.176, "step": 3744 }, { "epoch": 40.0, "grad_norm": 2.577596664428711, "learning_rate": 3e-05, "loss": 0.0013, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9268292682926829, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9313725490196079, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9235294117647059, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9289940828402367, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08484916388988495, "eval_overall_accuracy": 0.988124827395747, "eval_overall_f1": 0.9356435643564357, "eval_overall_precision": 0.9379652605459057, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2677, "eval_samples_per_second": 634.999, "eval_steps_per_second": 11.206, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.006187554448843002, "learning_rate": 2.95e-05, "loss": 0.0003, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9371980676328503, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9326923076923077, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9277108433734939, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9565217391304348, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.1043783500790596, "eval_overall_accuracy": 0.988124827395747, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2709, "eval_samples_per_second": 627.539, "eval_steps_per_second": 11.074, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.0018218038603663445, "learning_rate": 2.9e-05, "loss": 0.0007, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9326923076923078, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9238095238095239, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9305135951661632, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9625, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08745528012514114, "eval_overall_accuracy": 0.9884009942004971, "eval_overall_f1": 0.9438202247191012, "eval_overall_precision": 0.9545454545454546, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2667, "eval_samples_per_second": 637.368, "eval_steps_per_second": 11.248, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.0006399091216735542, "learning_rate": 2.8499999999999998e-05, "loss": 0.0009, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9468599033816425, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9423076923076923, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.934131736526946, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9570552147239264, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09338463842868805, "eval_overall_accuracy": 0.9886771610052472, "eval_overall_f1": 0.9489414694894147, "eval_overall_precision": 0.957286432160804, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2693, "eval_samples_per_second": 631.267, "eval_steps_per_second": 11.14, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.000999737880192697, "learning_rate": 2.8000000000000003e-05, "loss": 0.0011, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.9423076923076923, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9333333333333333, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9305135951661632, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9625, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.1033521518111229, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2771, "eval_samples_per_second": 613.495, "eval_steps_per_second": 10.826, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.00742174219340086, "learning_rate": 2.7500000000000004e-05, "loss": 0.0023, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9383886255924171, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9166666666666666, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9281437125748503, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.950920245398773, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9618320610687023, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9618320610687023, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0915098711848259, "eval_overall_accuracy": 0.9875724937862469, "eval_overall_f1": 0.9417596034696406, "eval_overall_precision": 0.945273631840796, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2698, "eval_samples_per_second": 630.154, "eval_steps_per_second": 11.12, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.005702113267034292, "learning_rate": 2.7000000000000002e-05, "loss": 0.0008, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9468599033816425, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9423076923076923, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9337349397590361, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9627329192546584, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0958024337887764, "eval_overall_accuracy": 0.987848660590997, "eval_overall_f1": 0.945137157107232, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2878, "eval_samples_per_second": 590.593, "eval_steps_per_second": 10.422, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.002750276355072856, "learning_rate": 2.6500000000000004e-05, "loss": 0.0017, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9428571428571428, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9252336448598131, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9300911854103344, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9683544303797469, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.1029670387506485, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2716, "eval_samples_per_second": 625.888, "eval_steps_per_second": 11.045, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.0004681101127061993, "learning_rate": 2.6000000000000002e-05, "loss": 0.0011, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9305135951661632, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9625, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0953538790345192, "eval_overall_accuracy": 0.987848660590997, "eval_overall_f1": 0.9452736318407959, "eval_overall_precision": 0.9523809523809523, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2758, "eval_samples_per_second": 616.375, "eval_steps_per_second": 10.877, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.00043562919017858803, "learning_rate": 2.5500000000000003e-05, "loss": 0.0004, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.9473684210526316, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9339622641509434, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9317507418397626, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9457831325301205, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9578544061302683, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.08848461508750916, "eval_overall_accuracy": 0.9884009942004971, "eval_overall_f1": 0.9442379182156135, "eval_overall_precision": 0.9477611940298507, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2719, "eval_samples_per_second": 625.237, "eval_steps_per_second": 11.034, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.00041732785757631063, "learning_rate": 2.5e-05, "loss": 0.0006, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9377990430622011, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9245283018867925, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.934131736526946, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9570552147239264, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9541984732824428, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9541984732824428, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.10080371052026749, "eval_overall_accuracy": 0.9886771610052472, "eval_overall_f1": 0.9416149068322981, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2727, "eval_samples_per_second": 623.352, "eval_steps_per_second": 11.0, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.00032116463989950716, "learning_rate": 2.45e-05, "loss": 0.0006, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9423076923076923, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9333333333333333, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.934131736526946, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9570552147239264, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9505703422053233, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.946969696969697, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.1017720028758049, "eval_overall_accuracy": 0.9892294946147473, "eval_overall_f1": 0.9416149068322981, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2742, "eval_samples_per_second": 619.963, "eval_steps_per_second": 10.941, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.00043605637620203197, "learning_rate": 2.4e-05, "loss": 0.0005, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9473684210526316, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9339622641509434, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9454545454545454, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9811320754716981, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.10258789360523224, "eval_overall_accuracy": 0.9884009942004971, "eval_overall_f1": 0.9501246882793016, "eval_overall_precision": 0.9596977329974811, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2698, "eval_samples_per_second": 630.084, "eval_steps_per_second": 11.119, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.000758039706852287, "learning_rate": 2.35e-05, "loss": 0.001, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9428571428571428, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9252336448598131, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9277108433734939, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9565217391304348, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9469696969696969, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9398496240601504, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.11404971033334732, "eval_overall_accuracy": 0.988124827395747, "eval_overall_f1": 0.9379652605459058, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2668, "eval_samples_per_second": 637.073, "eval_steps_per_second": 11.242, "step": 5088 }, { "epoch": 54.0, "grad_norm": 0.0017183785093948245, "learning_rate": 2.3000000000000003e-05, "loss": 0.0014, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9473684210526316, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9339622641509434, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9373134328358208, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9573170731707317, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.951310861423221, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9338235294117647, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09656347334384918, "eval_overall_accuracy": 0.987848660590997, "eval_overall_f1": 0.9445129469790382, "eval_overall_precision": 0.9433497536945813, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2732, "eval_samples_per_second": 622.264, "eval_steps_per_second": 10.981, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.0020402558147907257, "learning_rate": 2.25e-05, "loss": 0.0007, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9428571428571428, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9252336448598131, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9357798165137614, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9807692307692307, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.11855879426002502, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9569620253164557, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2662, "eval_samples_per_second": 638.676, "eval_steps_per_second": 11.271, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.0006124353385530412, "learning_rate": 2.2000000000000003e-05, "loss": 0.0017, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9528301886792453, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.926605504587156, "eval_LOCATION_recall": 0.9805825242718447, "eval_ORGANIZATION_f1": 0.942942942942943, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9691358024691358, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0981336161494255, "eval_overall_accuracy": 0.9889533278099972, "eval_overall_f1": 0.9493201483312732, "eval_overall_precision": 0.9504950495049505, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2681, "eval_samples_per_second": 634.161, "eval_steps_per_second": 11.191, "step": 5376 }, { "epoch": 57.0, "grad_norm": 1.6941306591033936, "learning_rate": 2.15e-05, "loss": 0.0011, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9457831325301205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9751552795031055, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9505703422053233, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.946969696969697, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.08642779290676117, "eval_overall_accuracy": 0.9897818282242474, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9573934837092731, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.267, "eval_samples_per_second": 636.67, "eval_steps_per_second": 11.235, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.00047853440628387034, "learning_rate": 2.1e-05, "loss": 0.0004, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9345794392523364, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9009009009009009, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9197530864197531, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9738562091503268, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.1228100135922432, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9363295880149815, "eval_overall_precision": 0.946969696969697, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2712, "eval_samples_per_second": 626.746, "eval_steps_per_second": 11.06, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.0004937900230288506, "learning_rate": 2.05e-05, "loss": 0.0007, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9373134328358208, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9573170731707317, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08817074447870255, "eval_overall_accuracy": 0.9897818282242474, "eval_overall_f1": 0.948019801980198, "eval_overall_precision": 0.9503722084367245, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.273, "eval_samples_per_second": 622.655, "eval_steps_per_second": 10.988, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.030789600685238838, "learning_rate": 2e-05, "loss": 0.0011, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9253731343283581, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9451219512195121, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07861620932817459, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9432098765432099, "eval_overall_precision": 0.9432098765432099, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.2732, "eval_samples_per_second": 622.282, "eval_steps_per_second": 10.981, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.001790176029317081, "learning_rate": 1.9500000000000003e-05, "loss": 0.0021, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9432835820895521, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9634146341463414, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07614848017692566, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9517923362175525, "eval_overall_precision": 0.9529702970297029, "eval_overall_recall": 0.9506172839506173, "eval_runtime": 0.2696, "eval_samples_per_second": 630.447, "eval_steps_per_second": 11.126, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.07821348309516907, "learning_rate": 1.9e-05, "loss": 0.0009, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9457831325301205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9751552795031055, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07894781976938248, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.949194547707559, "eval_overall_precision": 0.9527363184079602, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2792, "eval_samples_per_second": 608.862, "eval_steps_per_second": 10.745, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.012701842933893204, "learning_rate": 1.85e-05, "loss": 0.0008, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9515151515151515, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9874213836477987, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07250575721263885, "eval_overall_accuracy": 0.9911626622479978, "eval_overall_f1": 0.955223880597015, "eval_overall_precision": 0.9624060150375939, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2686, "eval_samples_per_second": 632.8, "eval_steps_per_second": 11.167, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.000331512390403077, "learning_rate": 1.8e-05, "loss": 0.0006, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9401197604790419, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9631901840490797, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9660377358490565, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9552238805970149, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.07420158386230469, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.9517923362175525, "eval_overall_precision": 0.9529702970297029, "eval_overall_recall": 0.9506172839506173, "eval_runtime": 0.2777, "eval_samples_per_second": 612.141, "eval_steps_per_second": 10.802, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.00041734057595022023, "learning_rate": 1.75e-05, "loss": 0.0002, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9401197604790419, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9631901840490797, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9660377358490565, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9552238805970149, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0765494853258133, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.9517923362175525, "eval_overall_precision": 0.9529702970297029, "eval_overall_recall": 0.9506172839506173, "eval_runtime": 0.2694, "eval_samples_per_second": 631.031, "eval_steps_per_second": 11.136, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.00017248648509848863, "learning_rate": 1.7000000000000003e-05, "loss": 0.0005, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07683875411748886, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9573934837092731, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.2728, "eval_samples_per_second": 623.056, "eval_steps_per_second": 10.995, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.00042237259913235903, "learning_rate": 1.65e-05, "loss": 0.0003, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9454545454545454, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9811320754716981, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08000968396663666, "eval_overall_accuracy": 0.9897818282242474, "eval_overall_f1": 0.9514321295143213, "eval_overall_precision": 0.9597989949748744, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.267, "eval_samples_per_second": 636.632, "eval_steps_per_second": 11.235, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.0002339567436138168, "learning_rate": 1.6000000000000003e-05, "loss": 0.0002, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9454545454545454, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9811320754716981, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08180868625640869, "eval_overall_accuracy": 0.9900579950289975, "eval_overall_f1": 0.9527363184079602, "eval_overall_precision": 0.9598997493734336, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2841, "eval_samples_per_second": 598.326, "eval_steps_per_second": 10.559, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.00043880593148060143, "learning_rate": 1.55e-05, "loss": 0.0002, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9520958083832336, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9754601226993865, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08010463416576385, "eval_overall_accuracy": 0.9914388290527478, "eval_overall_f1": 0.9554455445544555, "eval_overall_precision": 0.9578163771712159, "eval_overall_recall": 0.9530864197530864, "eval_runtime": 0.2652, "eval_samples_per_second": 641.058, "eval_steps_per_second": 11.313, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.0002598071005195379, "learning_rate": 1.5e-05, "loss": 0.0003, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08131943643093109, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9515527950310559, "eval_overall_precision": 0.9575, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2795, "eval_samples_per_second": 608.206, "eval_steps_per_second": 10.733, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.1277095526456833, "learning_rate": 1.45e-05, "loss": 0.0002, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9526627218934911, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9640718562874252, "eval_ORGANIZATION_recall": 0.9415204678362573, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0809330865740776, "eval_overall_accuracy": 0.991991162662248, "eval_overall_f1": 0.9568434032059187, "eval_overall_precision": 0.9556650246305419, "eval_overall_recall": 0.9580246913580247, "eval_runtime": 0.2678, "eval_samples_per_second": 634.748, "eval_steps_per_second": 11.201, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.0002249796671094373, "learning_rate": 1.4000000000000001e-05, "loss": 0.0003, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9520958083832336, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9754601226993865, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08355282992124557, "eval_overall_accuracy": 0.9914388290527478, "eval_overall_f1": 0.9554455445544555, "eval_overall_precision": 0.9578163771712159, "eval_overall_recall": 0.9530864197530864, "eval_runtime": 0.2712, "eval_samples_per_second": 626.786, "eval_steps_per_second": 11.061, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.00020244171901140362, "learning_rate": 1.3500000000000001e-05, "loss": 0.0002, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9489489489489489, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9753086419753086, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08445817977190018, "eval_overall_accuracy": 0.9911626622479978, "eval_overall_f1": 0.9541511771995044, "eval_overall_precision": 0.9577114427860697, "eval_overall_recall": 0.9506172839506173, "eval_runtime": 0.2679, "eval_samples_per_second": 634.645, "eval_steps_per_second": 11.2, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.00021373889467213303, "learning_rate": 1.3000000000000001e-05, "loss": 0.0002, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9523809523809523, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9696969696969697, "eval_ORGANIZATION_recall": 0.935672514619883, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08255745470523834, "eval_overall_accuracy": 0.9914388290527478, "eval_overall_f1": 0.9555555555555556, "eval_overall_precision": 0.9555555555555556, "eval_overall_recall": 0.9555555555555556, "eval_runtime": 0.2688, "eval_samples_per_second": 632.533, "eval_steps_per_second": 11.162, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.004408800974488258, "learning_rate": 1.25e-05, "loss": 0.0002, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9552238805970149, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975609756097561, "eval_ORGANIZATION_recall": 0.935672514619883, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08836288750171661, "eval_overall_accuracy": 0.9914388290527478, "eval_overall_f1": 0.9555555555555556, "eval_overall_precision": 0.9555555555555556, "eval_overall_recall": 0.9555555555555556, "eval_runtime": 0.2641, "eval_samples_per_second": 643.631, "eval_steps_per_second": 11.358, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.0001928619312820956, "learning_rate": 1.2e-05, "loss": 0.0004, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9457831325301205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9751552795031055, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0857018306851387, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.9528535980148883, "eval_overall_precision": 0.9576059850374065, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2698, "eval_samples_per_second": 630.058, "eval_steps_per_second": 11.119, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.00010045908129541203, "learning_rate": 1.1500000000000002e-05, "loss": 0.0002, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9457831325301205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9751552795031055, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08945944160223007, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9516728624535316, "eval_overall_precision": 0.9552238805970149, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2673, "eval_samples_per_second": 635.965, "eval_steps_per_second": 11.223, "step": 7392 }, { "epoch": 78.0, "grad_norm": 9.681603842182085e-05, "learning_rate": 1.1000000000000001e-05, "loss": 0.0002, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9478672985781991, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9259259259259259, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9309309309309309, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9567901234567902, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09126634895801544, "eval_overall_accuracy": 0.9900579950289975, "eval_overall_f1": 0.9443757725587144, "eval_overall_precision": 0.9455445544554455, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.2768, "eval_samples_per_second": 614.194, "eval_steps_per_second": 10.839, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.00010793562250910327, "learning_rate": 1.05e-05, "loss": 0.0002, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9520958083832336, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9754601226993865, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08136361837387085, "eval_overall_accuracy": 0.9917149958574979, "eval_overall_f1": 0.9566294919454771, "eval_overall_precision": 0.9601990049751243, "eval_overall_recall": 0.9530864197530864, "eval_runtime": 0.2653, "eval_samples_per_second": 640.863, "eval_steps_per_second": 11.309, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.00013283896259963512, "learning_rate": 1e-05, "loss": 0.0003, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0856899619102478, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9515527950310559, "eval_overall_precision": 0.9575, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2676, "eval_samples_per_second": 635.305, "eval_steps_per_second": 11.211, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.4648440480232239, "learning_rate": 9.5e-06, "loss": 0.0006, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9454545454545454, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9811320754716981, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08704782277345657, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9527363184079602, "eval_overall_precision": 0.9598997493734336, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.267, "eval_samples_per_second": 636.671, "eval_steps_per_second": 11.235, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.0003541087207850069, "learning_rate": 9e-06, "loss": 0.0002, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9454545454545454, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9811320754716981, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09802655875682831, "eval_overall_accuracy": 0.9897818282242474, "eval_overall_f1": 0.9503722084367245, "eval_overall_precision": 0.9551122194513716, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2682, "eval_samples_per_second": 633.952, "eval_steps_per_second": 11.187, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.00011640191951300949, "learning_rate": 8.500000000000002e-06, "loss": 0.0003, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9515151515151515, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9874213836477987, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08858073502779007, "eval_overall_accuracy": 0.9914388290527478, "eval_overall_f1": 0.9576059850374065, "eval_overall_precision": 0.9672544080604534, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2738, "eval_samples_per_second": 620.881, "eval_steps_per_second": 10.957, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.0006709143053740263, "learning_rate": 8.000000000000001e-06, "loss": 0.0002, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08843818306922913, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9515527950310559, "eval_overall_precision": 0.9575, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2811, "eval_samples_per_second": 604.84, "eval_steps_per_second": 10.674, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.0007605086429975927, "learning_rate": 7.5e-06, "loss": 0.0002, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0878758504986763, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9503722084367245, "eval_overall_precision": 0.9551122194513716, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.27, "eval_samples_per_second": 629.591, "eval_steps_per_second": 11.11, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.0001787513610906899, "learning_rate": 7.000000000000001e-06, "loss": 0.0003, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08754145354032516, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9528535980148883, "eval_overall_precision": 0.9576059850374065, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.284, "eval_samples_per_second": 598.582, "eval_steps_per_second": 10.563, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.0001922638766700402, "learning_rate": 6.5000000000000004e-06, "loss": 0.0005, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08587876707315445, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2729, "eval_samples_per_second": 622.901, "eval_steps_per_second": 10.992, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.0001244653103640303, "learning_rate": 6e-06, "loss": 0.0002, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08630730211734772, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.272, "eval_samples_per_second": 624.983, "eval_steps_per_second": 11.029, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.00015338353114202619, "learning_rate": 5.500000000000001e-06, "loss": 0.0001, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08653977513313293, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.955223880597015, "eval_overall_precision": 0.9624060150375939, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.271, "eval_samples_per_second": 627.204, "eval_steps_per_second": 11.068, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.00015154466382227838, "learning_rate": 5e-06, "loss": 0.0006, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.942942942942943, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9691358024691358, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08594457805156708, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.9504950495049505, "eval_overall_precision": 0.9528535980148883, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2725, "eval_samples_per_second": 623.798, "eval_steps_per_second": 11.008, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.00021005785674788058, "learning_rate": 4.5e-06, "loss": 0.0004, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08699291199445724, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9528535980148883, "eval_overall_precision": 0.9576059850374065, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2865, "eval_samples_per_second": 593.295, "eval_steps_per_second": 10.47, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.00020234761177562177, "learning_rate": 4.000000000000001e-06, "loss": 0.0004, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0870344266295433, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2667, "eval_samples_per_second": 637.421, "eval_steps_per_second": 11.249, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.00018614475266076624, "learning_rate": 3.5000000000000004e-06, "loss": 0.0001, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08727699518203735, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2734, "eval_samples_per_second": 621.723, "eval_steps_per_second": 10.972, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.000258227635640651, "learning_rate": 3e-06, "loss": 0.0002, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08997541666030884, "eval_overall_accuracy": 0.9900579950289975, "eval_overall_f1": 0.949194547707559, "eval_overall_precision": 0.9527363184079602, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.272, "eval_samples_per_second": 624.978, "eval_steps_per_second": 11.029, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.0005536518874578178, "learning_rate": 2.5e-06, "loss": 0.0001, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9523809523809522, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9345794392523364, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9425981873111782, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.975, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08989305794239044, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9503722084367245, "eval_overall_precision": 0.9551122194513716, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2828, "eval_samples_per_second": 601.055, "eval_steps_per_second": 10.607, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.00011867189459735528, "learning_rate": 2.0000000000000003e-06, "loss": 0.0001, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.089784637093544, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2697, "eval_samples_per_second": 630.298, "eval_steps_per_second": 11.123, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.0001135926358983852, "learning_rate": 1.5e-06, "loss": 0.0004, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08897440135478973, "eval_overall_accuracy": 0.9903341618337476, "eval_overall_f1": 0.9528535980148883, "eval_overall_precision": 0.9576059850374065, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2712, "eval_samples_per_second": 626.956, "eval_steps_per_second": 11.064, "step": 9312 }, { "epoch": 98.0, "grad_norm": 0.00014891372120473534, "learning_rate": 1.0000000000000002e-06, "loss": 0.0004, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08858802914619446, "eval_overall_accuracy": 0.9906103286384976, "eval_overall_f1": 0.9540372670807454, "eval_overall_precision": 0.96, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.268, "eval_samples_per_second": 634.223, "eval_steps_per_second": 11.192, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.00010668356844689697, "learning_rate": 5.000000000000001e-07, "loss": 0.0001, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.08721774816513062, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.955223880597015, "eval_overall_precision": 0.9624060150375939, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.269, "eval_samples_per_second": 631.976, "eval_steps_per_second": 11.153, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.008727684617042542, "learning_rate": 0.0, "loss": 0.0002, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9569377990430622, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9433962264150944, "eval_LOCATION_recall": 0.970873786407767, "eval_ORGANIZATION_f1": 0.9486404833836857, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.98125, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0870044156908989, "eval_overall_accuracy": 0.9908864954432477, "eval_overall_f1": 0.955223880597015, "eval_overall_precision": 0.9624060150375939, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.2768, "eval_samples_per_second": 614.248, "eval_steps_per_second": 10.84, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3876556506498588.0, "train_loss": 0.005288698806446821, "train_runtime": 895.2571, "train_samples_per_second": 171.012, "train_steps_per_second": 10.723 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3876556506498588.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }