{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.524846076965332, "learning_rate": 4.9500000000000004e-05, "loss": 0.2442, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.8972972972972972, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8383838383838383, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.898936170212766, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8535353535353535, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05805998295545578, "eval_overall_accuracy": 0.9821862348178138, "eval_overall_f1": 0.9217603911980441, "eval_overall_precision": 0.8849765258215962, "eval_overall_recall": 0.9617346938775511, "eval_runtime": 0.2705, "eval_samples_per_second": 628.537, "eval_steps_per_second": 11.092, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.2480790615081787, "learning_rate": 4.9e-05, "loss": 0.0581, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.8864864864864866, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8282828282828283, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9190751445086704, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9464285714285714, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05479570850729942, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9289340101522842, "eval_overall_precision": 0.9242424242424242, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2786, "eval_samples_per_second": 610.099, "eval_steps_per_second": 10.766, "step": 192 }, { "epoch": 3.0, "grad_norm": 0.7553579807281494, "learning_rate": 4.85e-05, "loss": 0.0357, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.9010989010989011, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8541666666666666, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05141273885965347, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2743, "eval_samples_per_second": 619.871, "eval_steps_per_second": 10.939, "step": 288 }, { "epoch": 4.0, "grad_norm": 2.5006167888641357, "learning_rate": 4.8e-05, "loss": 0.0251, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9209809264305177, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8941798941798942, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9641434262948206, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.983739837398374, "eval_PERSON_recall": 0.9453125, "eval_loss": 0.06069042533636093, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9331651954602775, "eval_overall_precision": 0.9226932668329177, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2776, "eval_samples_per_second": 612.329, "eval_steps_per_second": 10.806, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.45395660400390625, "learning_rate": 4.75e-05, "loss": 0.0146, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9333333333333335, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9230769230769231, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.06169137358665466, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9380530973451328, "eval_overall_precision": 0.9298245614035088, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2738, "eval_samples_per_second": 620.856, "eval_steps_per_second": 10.956, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.7863659262657166, "learning_rate": 4.7e-05, "loss": 0.0117, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.851063829787234, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9166666666666666, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9065934065934066, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9603174603174603, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9758064516129032, "eval_PERSON_recall": 0.9453125, "eval_loss": 0.0706215351819992, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9242424242424242, "eval_overall_precision": 0.915, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.28, "eval_samples_per_second": 607.104, "eval_steps_per_second": 10.714, "step": 576 }, { "epoch": 7.0, "grad_norm": 0.042494997382164, "learning_rate": 4.6500000000000005e-05, "loss": 0.0083, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.8526315789473684, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7788461538461539, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9534883720930233, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9461538461538461, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0925956517457962, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9142857142857143, "eval_overall_precision": 0.8910411622276029, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2741, "eval_samples_per_second": 620.296, "eval_steps_per_second": 10.946, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.1783752143383026, "learning_rate": 4.600000000000001e-05, "loss": 0.008, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8999999999999999, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8617021276595744, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9371428571428573, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9534883720930233, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.07805542647838593, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9399744572158366, "eval_overall_precision": 0.9411764705882353, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2886, "eval_samples_per_second": 589.03, "eval_steps_per_second": 10.395, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.3219904899597168, "learning_rate": 4.55e-05, "loss": 0.0042, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8914285714285715, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8764044943820225, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9662921348314607, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9662921348314607, "eval_ORGANIZATION_recall": 0.9662921348314607, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06586796045303345, "eval_overall_accuracy": 0.9889338731443995, "eval_overall_f1": 0.9516539440203563, "eval_overall_precision": 0.949238578680203, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2757, "eval_samples_per_second": 616.613, "eval_steps_per_second": 10.881, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.016961606219410896, "learning_rate": 4.5e-05, "loss": 0.0044, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9441340782122906, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9388888888888889, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9494163424124514, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9457364341085271, "eval_PERSON_recall": 0.953125, "eval_loss": 0.07118100672960281, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9343434343434343, "eval_overall_precision": 0.925, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2777, "eval_samples_per_second": 612.13, "eval_steps_per_second": 10.802, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.9713481664657593, "learning_rate": 4.4500000000000004e-05, "loss": 0.005, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8972972972972972, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8383838383838383, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9438202247191011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9438202247191011, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.08548479527235031, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9408805031446542, "eval_overall_precision": 0.9280397022332506, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2746, "eval_samples_per_second": 619.083, "eval_steps_per_second": 10.925, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.007533730939030647, "learning_rate": 4.4000000000000006e-05, "loss": 0.0036, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9408450704225352, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.943502824858757, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.08587783575057983, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9428208386277002, "eval_overall_precision": 0.9392405063291139, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2742, "eval_samples_per_second": 619.889, "eval_steps_per_second": 10.939, "step": 1152 }, { "epoch": 13.0, "grad_norm": 0.012886933982372284, "learning_rate": 4.35e-05, "loss": 0.0042, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9526462395543176, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9447513812154696, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.07611611485481262, "eval_overall_accuracy": 0.9889338731443995, "eval_overall_f1": 0.9505703422053231, "eval_overall_precision": 0.9445843828715366, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2752, "eval_samples_per_second": 617.726, "eval_steps_per_second": 10.901, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.020214928314089775, "learning_rate": 4.3e-05, "loss": 0.0036, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.9028571428571427, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8876404494382022, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08433674275875092, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9435897435897437, "eval_overall_precision": 0.9484536082474226, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2767, "eval_samples_per_second": 614.444, "eval_steps_per_second": 10.843, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.005007833708077669, "learning_rate": 4.25e-05, "loss": 0.0028, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.9111111111111112, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8723404255319149, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.934844192634561, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9428571428571428, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09056564420461655, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2739, "eval_samples_per_second": 620.731, "eval_steps_per_second": 10.954, "step": 1440 }, { "epoch": 16.0, "grad_norm": 0.002987402491271496, "learning_rate": 4.2e-05, "loss": 0.0017, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8950276243093923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8526315789473684, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9394812680115273, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9644970414201184, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9606299212598425, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9682539682539683, "eval_PERSON_recall": 0.953125, "eval_loss": 0.09138376265764236, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9360613810741688, "eval_overall_precision": 0.9384615384615385, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2746, "eval_samples_per_second": 619.018, "eval_steps_per_second": 10.924, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.003726888680830598, "learning_rate": 4.15e-05, "loss": 0.002, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.8876404494382023, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8586956521739131, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.08281037956476212, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2742, "eval_samples_per_second": 619.907, "eval_steps_per_second": 10.94, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.003566289786249399, "learning_rate": 4.1e-05, "loss": 0.0033, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.925207756232687, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.912568306010929, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0640731155872345, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9385194479297364, "eval_overall_precision": 0.9234567901234568, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2758, "eval_samples_per_second": 616.283, "eval_steps_per_second": 10.876, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.016631081700325012, "learning_rate": 4.05e-05, "loss": 0.0024, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8863636363636364, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9476584022038568, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9297297297297298, "eval_ORGANIZATION_recall": 0.9662921348314607, "eval_PERSON_f1": 0.9606299212598425, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9682539682539683, "eval_PERSON_recall": 0.953125, "eval_loss": 0.09817531704902649, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9382093316519546, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2771, "eval_samples_per_second": 613.515, "eval_steps_per_second": 10.827, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.35814717411994934, "learning_rate": 4e-05, "loss": 0.0037, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8864864864864866, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8282828282828283, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.96045197740113, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9659090909090909, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0904369205236435, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9469696969696969, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2871, "eval_samples_per_second": 592.205, "eval_steps_per_second": 10.451, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.010785204358398914, "learning_rate": 3.9500000000000005e-05, "loss": 0.0038, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9411764705882353, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9385474860335196, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9609375, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9609375, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0786675289273262, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9444444444444445, "eval_overall_precision": 0.935, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2796, "eval_samples_per_second": 607.994, "eval_steps_per_second": 10.729, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.02177988551557064, "learning_rate": 3.9000000000000006e-05, "loss": 0.0024, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8950276243093923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8526315789473684, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9388888888888889, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9285714285714286, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9523809523809523, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.967741935483871, "eval_PERSON_recall": 0.9375, "eval_loss": 0.06972306966781616, "eval_overall_accuracy": 0.9889338731443995, "eval_overall_f1": 0.9331651954602775, "eval_overall_precision": 0.9226932668329177, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2769, "eval_samples_per_second": 613.91, "eval_steps_per_second": 10.834, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.1288156807422638, "learning_rate": 3.85e-05, "loss": 0.0041, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9467787114845938, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9441340782122905, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0794149860739708, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9480354879594423, "eval_overall_precision": 0.9420654911838791, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2829, "eval_samples_per_second": 600.817, "eval_steps_per_second": 10.603, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.0011946976883336902, "learning_rate": 3.8e-05, "loss": 0.0033, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9333333333333335, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9230769230769231, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9603174603174603, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9758064516129032, "eval_PERSON_recall": 0.9453125, "eval_loss": 0.08298368752002716, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9390862944162437, "eval_overall_precision": 0.9343434343434344, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2768, "eval_samples_per_second": 614.247, "eval_steps_per_second": 10.84, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.008975312113761902, "learning_rate": 3.7500000000000003e-05, "loss": 0.0034, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9526462395543176, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9447513812154696, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08038989454507828, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9496221662468515, "eval_overall_precision": 0.9378109452736318, "eval_overall_recall": 0.9617346938775511, "eval_runtime": 0.2782, "eval_samples_per_second": 611.154, "eval_steps_per_second": 10.785, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.003603309392929077, "learning_rate": 3.7e-05, "loss": 0.0012, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9444444444444444, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9340659340659341, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07283078134059906, "eval_overall_accuracy": 0.9902834008097166, "eval_overall_f1": 0.9506953223767383, "eval_overall_precision": 0.9423558897243107, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2753, "eval_samples_per_second": 617.432, "eval_steps_per_second": 10.896, "step": 2496 }, { "epoch": 27.0, "grad_norm": 0.0058512561954557896, "learning_rate": 3.65e-05, "loss": 0.0015, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9505494505494506, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9301075268817204, "eval_ORGANIZATION_recall": 0.9719101123595506, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09567292034626007, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9508196721311475, "eval_overall_precision": 0.940149625935162, "eval_overall_recall": 0.9617346938775511, "eval_runtime": 0.2824, "eval_samples_per_second": 601.88, "eval_steps_per_second": 10.621, "step": 2592 }, { "epoch": 28.0, "grad_norm": 1.171476125717163, "learning_rate": 3.6e-05, "loss": 0.0029, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07663165777921677, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9517766497461929, "eval_overall_precision": 0.946969696969697, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2755, "eval_samples_per_second": 617.074, "eval_steps_per_second": 10.89, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.40970727801322937, "learning_rate": 3.55e-05, "loss": 0.0031, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9130434782608695, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9455587392550143, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9649122807017544, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08022492378950119, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9467005076142132, "eval_overall_precision": 0.9419191919191919, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2752, "eval_samples_per_second": 617.806, "eval_steps_per_second": 10.902, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.08574865758419037, "learning_rate": 3.5e-05, "loss": 0.0018, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9577464788732394, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.96045197740113, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08365346491336823, "eval_overall_accuracy": 0.9892037786774629, "eval_overall_f1": 0.9517766497461929, "eval_overall_precision": 0.946969696969697, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2756, "eval_samples_per_second": 616.825, "eval_steps_per_second": 10.885, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.002840681467205286, "learning_rate": 3.45e-05, "loss": 0.0017, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.961111111111111, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9505494505494505, "eval_ORGANIZATION_recall": 0.9719101123595506, "eval_PERSON_f1": 0.9606299212598425, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9682539682539683, "eval_PERSON_recall": 0.953125, "eval_loss": 0.07920122146606445, "eval_overall_accuracy": 0.9902834008097166, "eval_overall_f1": 0.9569620253164557, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.9642857142857143, "eval_runtime": 0.2801, "eval_samples_per_second": 606.828, "eval_steps_per_second": 10.709, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.001423178124241531, "learning_rate": 3.4000000000000007e-05, "loss": 0.0017, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9633802816901408, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9661016949152542, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.06753446161746979, "eval_overall_accuracy": 0.99055330634278, "eval_overall_f1": 0.9531051964512041, "eval_overall_precision": 0.947103274559194, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2741, "eval_samples_per_second": 620.297, "eval_steps_per_second": 10.946, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.001040176604874432, "learning_rate": 3.35e-05, "loss": 0.0012, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9542857142857143, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9709302325581395, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09092291444540024, "eval_overall_accuracy": 0.9897435897435898, "eval_overall_f1": 0.9539641943734015, "eval_overall_precision": 0.9564102564102565, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2779, "eval_samples_per_second": 611.82, "eval_steps_per_second": 10.797, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.21219216287136078, "learning_rate": 3.3e-05, "loss": 0.002, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9287749287749287, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9421965317919075, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9565217391304348, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968, "eval_PERSON_recall": 0.9453125, "eval_loss": 0.10773035138845444, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9370988446726572, "eval_overall_precision": 0.9431524547803618, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2753, "eval_samples_per_second": 617.461, "eval_steps_per_second": 10.896, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.002467579208314419, "learning_rate": 3.2500000000000004e-05, "loss": 0.0023, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.95, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9395604395604396, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.091239333152771, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9494949494949495, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2753, "eval_samples_per_second": 617.528, "eval_steps_per_second": 10.898, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.01504553947597742, "learning_rate": 3.2000000000000005e-05, "loss": 0.0016, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9545454545454545, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9655172413793104, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.08393337577581406, "eval_overall_accuracy": 0.9892037786774629, "eval_overall_f1": 0.9540816326530612, "eval_overall_precision": 0.9540816326530612, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.275, "eval_samples_per_second": 618.195, "eval_steps_per_second": 10.909, "step": 3456 }, { "epoch": 37.0, "grad_norm": 0.003291564527899027, "learning_rate": 3.15e-05, "loss": 0.0012, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.10695616900920868, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9399744572158366, "eval_overall_precision": 0.9411764705882353, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2752, "eval_samples_per_second": 617.842, "eval_steps_per_second": 10.903, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.0029779509641230106, "learning_rate": 3.1e-05, "loss": 0.0009, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9392265193370165, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8947368421052632, "eval_LOCATION_recall": 0.9883720930232558, "eval_ORGANIZATION_f1": 0.9431818181818182, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9540229885057471, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.08564214408397675, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9516539440203563, "eval_overall_precision": 0.949238578680203, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2745, "eval_samples_per_second": 619.218, "eval_steps_per_second": 10.927, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.0008915510843507946, "learning_rate": 3.05e-05, "loss": 0.0006, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9333333333333332, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8936170212765957, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9375000000000001, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9482758620689655, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09636305272579193, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2929, "eval_samples_per_second": 580.423, "eval_steps_per_second": 10.243, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.0008046123548410833, "learning_rate": 3e-05, "loss": 0.0011, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9464788732394366, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9491525423728814, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09920275211334229, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9491094147582698, "eval_overall_precision": 0.9467005076142132, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2781, "eval_samples_per_second": 611.234, "eval_steps_per_second": 10.786, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.000929164991248399, "learning_rate": 2.95e-05, "loss": 0.0009, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9435028248587571, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9488636363636364, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.1071603074669838, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9491094147582698, "eval_overall_precision": 0.9467005076142132, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2831, "eval_samples_per_second": 600.488, "eval_steps_per_second": 10.597, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.00026114823413081467, "learning_rate": 2.9e-05, "loss": 0.0007, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9333333333333332, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8936170212765957, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9458689458689458, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9595375722543352, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9682539682539683, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9838709677419355, "eval_PERSON_recall": 0.953125, "eval_loss": 0.1193474680185318, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9501915708812262, "eval_overall_precision": 0.9514066496163683, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2756, "eval_samples_per_second": 616.891, "eval_steps_per_second": 10.886, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.0013629100285470486, "learning_rate": 2.8499999999999998e-05, "loss": 0.0014, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9435028248587571, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9488636363636364, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9606299212598425, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9682539682539683, "eval_PERSON_recall": 0.953125, "eval_loss": 0.11290433257818222, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9479034307496824, "eval_overall_precision": 0.9443037974683545, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2779, "eval_samples_per_second": 611.663, "eval_steps_per_second": 10.794, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.0006920368759892881, "learning_rate": 2.8000000000000003e-05, "loss": 0.0007, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.9438202247191011, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9130434782608695, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9464788732394366, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9491525423728814, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9609375, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9609375, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.12893009185791016, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9505703422053231, "eval_overall_precision": 0.9445843828715366, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2752, "eval_samples_per_second": 617.77, "eval_steps_per_second": 10.902, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.0005794434691779315, "learning_rate": 2.7500000000000004e-05, "loss": 0.0006, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9281767955801105, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8842105263157894, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9470752089136492, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9392265193370166, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11674495786428452, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.949748743718593, "eval_overall_precision": 0.9356435643564357, "eval_overall_recall": 0.9642857142857143, "eval_runtime": 0.2809, "eval_samples_per_second": 605.289, "eval_steps_per_second": 10.682, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.004815615713596344, "learning_rate": 2.7000000000000002e-05, "loss": 0.0014, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9461756373937678, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9542857142857143, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9682539682539683, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9838709677419355, "eval_PERSON_recall": 0.953125, "eval_loss": 0.11675991863012314, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2778, "eval_samples_per_second": 612.001, "eval_steps_per_second": 10.8, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.0005003380356356502, "learning_rate": 2.6500000000000004e-05, "loss": 0.0022, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9421965317919075, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9702380952380952, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.10903999209403992, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9475032010243277, "eval_overall_precision": 0.9511568123393316, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2786, "eval_samples_per_second": 610.178, "eval_steps_per_second": 10.768, "step": 4512 }, { "epoch": 48.0, "grad_norm": 34.43635559082031, "learning_rate": 2.6000000000000002e-05, "loss": 0.0033, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9385474860335196, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9333333333333333, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9603174603174603, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9758064516129032, "eval_PERSON_recall": 0.9453125, "eval_loss": 0.08986053615808487, "eval_overall_accuracy": 0.9889338731443995, "eval_overall_f1": 0.9465648854961832, "eval_overall_precision": 0.9441624365482234, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2751, "eval_samples_per_second": 617.952, "eval_steps_per_second": 10.905, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.011161034926772118, "learning_rate": 2.5500000000000003e-05, "loss": 0.001, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9452449567723343, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9704142011834319, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9682539682539683, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9838709677419355, "eval_PERSON_recall": 0.953125, "eval_loss": 0.11230127513408661, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9473684210526317, "eval_overall_precision": 0.9534883720930233, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2762, "eval_samples_per_second": 615.553, "eval_steps_per_second": 10.863, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.007018107455223799, "learning_rate": 2.5e-05, "loss": 0.0007, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9405099150141643, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9485714285714286, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09373489022254944, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9476372924648786, "eval_overall_precision": 0.948849104859335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2788, "eval_samples_per_second": 609.778, "eval_steps_per_second": 10.761, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.0013735599350184202, "learning_rate": 2.45e-05, "loss": 0.0011, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9329608938547486, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9277777777777778, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.10816428065299988, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9481668773704172, "eval_overall_precision": 0.9398496240601504, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2867, "eval_samples_per_second": 593.04, "eval_steps_per_second": 10.465, "step": 4896 }, { "epoch": 52.0, "grad_norm": 2.9921071529388428, "learning_rate": 2.4e-05, "loss": 0.0015, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9394812680115273, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9644970414201184, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.11124741286039352, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9460154241645244, "eval_overall_precision": 0.9533678756476683, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2821, "eval_samples_per_second": 602.698, "eval_steps_per_second": 10.636, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.00152446492575109, "learning_rate": 2.35e-05, "loss": 0.0009, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9444444444444444, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9340659340659341, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10318152606487274, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9494949494949495, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2786, "eval_samples_per_second": 610.124, "eval_steps_per_second": 10.767, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.4761940240859985, "learning_rate": 2.3000000000000003e-05, "loss": 0.0033, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.942857142857143, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9593023255813954, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.11812768876552582, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9488491048593349, "eval_overall_precision": 0.9512820512820512, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2746, "eval_samples_per_second": 619.118, "eval_steps_per_second": 10.926, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.4280019700527191, "learning_rate": 2.25e-05, "loss": 0.0008, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9485714285714285, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9651162790697675, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.12067463994026184, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.951530612244898, "eval_overall_precision": 0.951530612244898, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2807, "eval_samples_per_second": 605.66, "eval_steps_per_second": 10.688, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.0007758406572975218, "learning_rate": 2.2000000000000003e-05, "loss": 0.0009, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9421965317919075, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9702380952380952, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.13788987696170807, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9435897435897437, "eval_overall_precision": 0.9484536082474226, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2774, "eval_samples_per_second": 612.89, "eval_steps_per_second": 10.816, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.0007105050608515739, "learning_rate": 2.15e-05, "loss": 0.001, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9512893982808023, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9707602339181286, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.11200056970119476, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9526248399487837, "eval_overall_precision": 0.9562982005141388, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2759, "eval_samples_per_second": 616.121, "eval_steps_per_second": 10.873, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.0010843342170119286, "learning_rate": 2.1e-05, "loss": 0.0013, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9375000000000001, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9482758620689655, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.1086345985531807, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9465648854961832, "eval_overall_precision": 0.9441624365482234, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2775, "eval_samples_per_second": 612.711, "eval_steps_per_second": 10.813, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.0003581370983738452, "learning_rate": 2.05e-05, "loss": 0.0005, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9333333333333333, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9640718562874252, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, "eval_loss": 0.12184558063745499, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9389312977099236, "eval_overall_precision": 0.9365482233502538, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2757, "eval_samples_per_second": 616.564, "eval_steps_per_second": 10.881, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.0011471403995528817, "learning_rate": 2e-05, "loss": 0.0007, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9392265193370166, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9239130434782609, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9682539682539683, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9838709677419355, "eval_PERSON_recall": 0.953125, "eval_loss": 0.09577618539333344, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9444444444444445, "eval_overall_precision": 0.935, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2772, "eval_samples_per_second": 613.208, "eval_steps_per_second": 10.821, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.1609802097082138, "learning_rate": 1.9500000000000003e-05, "loss": 0.0002, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.942857142857143, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9593023255813954, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.10759799927473068, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9437340153452686, "eval_overall_precision": 0.9461538461538461, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.278, "eval_samples_per_second": 611.566, "eval_steps_per_second": 10.792, "step": 5856 }, { "epoch": 62.0, "grad_norm": 12.48816204071045, "learning_rate": 1.9e-05, "loss": 0.0023, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9497206703910613, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9139784946236559, "eval_LOCATION_recall": 0.9883720930232558, "eval_ORGANIZATION_f1": 0.949438202247191, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.949438202247191, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08770798146724701, "eval_overall_accuracy": 0.9894736842105263, "eval_overall_f1": 0.9569620253164557, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.9642857142857143, "eval_runtime": 0.2765, "eval_samples_per_second": 614.816, "eval_steps_per_second": 10.85, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.0009683805401436985, "learning_rate": 1.85e-05, "loss": 0.0013, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9526462395543176, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9447513812154696, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08852725476026535, "eval_overall_accuracy": 0.9894736842105263, "eval_overall_f1": 0.9570707070707071, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9668367346938775, "eval_runtime": 0.2742, "eval_samples_per_second": 620.036, "eval_steps_per_second": 10.942, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.0008049598545767367, "learning_rate": 1.8e-05, "loss": 0.0009, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9577464788732394, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.96045197740113, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08247757703065872, "eval_overall_accuracy": 0.9900134952766532, "eval_overall_f1": 0.9593908629441624, "eval_overall_precision": 0.9545454545454546, "eval_overall_recall": 0.9642857142857143, "eval_runtime": 0.2795, "eval_samples_per_second": 608.296, "eval_steps_per_second": 10.735, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.0007374592823907733, "learning_rate": 1.75e-05, "loss": 0.0003, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.951841359773371, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.96, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0837675929069519, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9578544061302682, "eval_overall_precision": 0.959079283887468, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2762, "eval_samples_per_second": 615.537, "eval_steps_per_second": 10.862, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.0010951802833005786, "learning_rate": 1.7000000000000003e-05, "loss": 0.0006, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9385474860335195, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9032258064516129, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.951841359773371, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.96, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.09569484740495682, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9567430025445292, "eval_overall_precision": 0.9543147208121827, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2788, "eval_samples_per_second": 609.729, "eval_steps_per_second": 10.76, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.005136103834956884, "learning_rate": 1.65e-05, "loss": 0.0004, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9455587392550143, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9649122807017544, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11290714144706726, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9514066496163682, "eval_overall_precision": 0.9538461538461539, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2755, "eval_samples_per_second": 617.088, "eval_steps_per_second": 10.89, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.0017895177006721497, "learning_rate": 1.6000000000000003e-05, "loss": 0.0003, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9333333333333332, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8936170212765957, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.1161164864897728, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9491094147582698, "eval_overall_precision": 0.9467005076142132, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2777, "eval_samples_per_second": 612.091, "eval_steps_per_second": 10.802, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.00024917226983234286, "learning_rate": 1.55e-05, "loss": 0.0002, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9333333333333332, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8936170212765957, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9394812680115273, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9644970414201184, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.12338589131832123, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9476372924648786, "eval_overall_precision": 0.948849104859335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2765, "eval_samples_per_second": 614.72, "eval_steps_per_second": 10.848, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.002585263457149267, "learning_rate": 1.5e-05, "loss": 0.0006, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9491525423728814, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9485714285714285, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9651162790697675, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11618266254663467, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9577464788732394, "eval_overall_precision": 0.961439588688946, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2791, "eval_samples_per_second": 609.103, "eval_steps_per_second": 10.749, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.11092416942119598, "learning_rate": 1.45e-05, "loss": 0.0002, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9545454545454545, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9333333333333333, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.951841359773371, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.96, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11071506142616272, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9604086845466155, "eval_overall_precision": 0.9616368286445013, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2755, "eval_samples_per_second": 617.089, "eval_steps_per_second": 10.89, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.000154004359501414, "learning_rate": 1.4000000000000001e-05, "loss": 0.0002, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9491525423728814, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9488636363636365, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9597701149425287, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.1120668277144432, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9578544061302682, "eval_overall_precision": 0.959079283887468, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2766, "eval_samples_per_second": 614.651, "eval_steps_per_second": 10.847, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.00016238982789218426, "learning_rate": 1.3500000000000001e-05, "loss": 0.0002, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9491525423728814, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9461756373937678, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9542857142857143, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11216197162866592, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9566326530612245, "eval_overall_precision": 0.9566326530612245, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.285, "eval_samples_per_second": 596.506, "eval_steps_per_second": 10.527, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.00033696964965201914, "learning_rate": 1.3000000000000001e-05, "loss": 0.0005, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9491525423728814, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9461756373937678, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9542857142857143, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.1126818060874939, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9566326530612245, "eval_overall_precision": 0.9566326530612245, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2829, "eval_samples_per_second": 600.942, "eval_steps_per_second": 10.605, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.0003657756024040282, "learning_rate": 1.25e-05, "loss": 0.0004, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9438202247191011, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9130434782608695, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9431818181818182, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9540229885057471, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11697709560394287, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9516539440203563, "eval_overall_precision": 0.949238578680203, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2786, "eval_samples_per_second": 610.145, "eval_steps_per_second": 10.767, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.00010258240217808634, "learning_rate": 1.2e-05, "loss": 0.0003, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9545454545454545, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9333333333333333, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9497206703910613, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9550561797752809, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10894415527582169, "eval_overall_accuracy": 0.9892037786774629, "eval_overall_f1": 0.9568527918781725, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.9617346938775511, "eval_runtime": 0.2762, "eval_samples_per_second": 615.53, "eval_steps_per_second": 10.862, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.0007157445070333779, "learning_rate": 1.1500000000000002e-05, "loss": 0.001, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9491525423728814, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9582172701949861, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9502762430939227, "eval_ORGANIZATION_recall": 0.9662921348314607, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.10824745148420334, "eval_overall_accuracy": 0.9894736842105263, "eval_overall_f1": 0.9608091024020227, "eval_overall_precision": 0.9523809523809523, "eval_overall_recall": 0.9693877551020408, "eval_runtime": 0.2772, "eval_samples_per_second": 613.187, "eval_steps_per_second": 10.821, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.013988692313432693, "learning_rate": 1.1000000000000001e-05, "loss": 0.0012, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9355742296918768, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9329608938547486, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.10093524307012558, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9456384323640962, "eval_overall_precision": 0.9373433583959899, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.278, "eval_samples_per_second": 611.532, "eval_steps_per_second": 10.792, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.0004660775884985924, "learning_rate": 1.05e-05, "loss": 0.0002, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9435028248587571, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9488636363636364, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.10507776588201523, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9416243654822335, "eval_overall_precision": 0.9368686868686869, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2738, "eval_samples_per_second": 620.982, "eval_steps_per_second": 10.959, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.0003681587695609778, "learning_rate": 1e-05, "loss": 0.0002, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9111111111111112, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8723404255319149, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9431818181818182, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9540229885057471, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.11081729084253311, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9426751592356687, "eval_overall_precision": 0.9414758269720102, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2753, "eval_samples_per_second": 617.505, "eval_steps_per_second": 10.897, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.0002929773472715169, "learning_rate": 9.5e-06, "loss": 0.0005, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9461756373937678, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9542857142857143, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10365120321512222, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9477707006369427, "eval_overall_precision": 0.9465648854961832, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2735, "eval_samples_per_second": 621.671, "eval_steps_per_second": 10.971, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.00024136666615959257, "learning_rate": 9e-06, "loss": 0.0003, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9431818181818182, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9540229885057471, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9644268774703557, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.976, "eval_PERSON_recall": 0.953125, "eval_loss": 0.1031210720539093, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2763, "eval_samples_per_second": 615.215, "eval_steps_per_second": 10.857, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.00017047034634742886, "learning_rate": 8.500000000000002e-06, "loss": 0.0003, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09963062405586243, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9541984732824428, "eval_overall_precision": 0.9517766497461929, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2758, "eval_samples_per_second": 616.456, "eval_steps_per_second": 10.879, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.0007244854350574315, "learning_rate": 8.000000000000001e-06, "loss": 0.0002, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09869366884231567, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9554140127388535, "eval_overall_precision": 0.9541984732824428, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2758, "eval_samples_per_second": 616.49, "eval_steps_per_second": 10.879, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.00027018680702894926, "learning_rate": 7.5e-06, "loss": 0.0004, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10174024105072021, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9554140127388535, "eval_overall_precision": 0.9541984732824428, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2746, "eval_samples_per_second": 618.994, "eval_steps_per_second": 10.923, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.00015681206423323601, "learning_rate": 7.000000000000001e-06, "loss": 0.0002, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10175755620002747, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9554140127388535, "eval_overall_precision": 0.9541984732824428, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2831, "eval_samples_per_second": 600.454, "eval_steps_per_second": 10.596, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.00022313217050395906, "learning_rate": 6.5000000000000004e-06, "loss": 0.0001, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.957983193277311, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9553072625698324, "eval_ORGANIZATION_recall": 0.9606741573033708, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10168375074863434, "eval_overall_accuracy": 0.9889338731443995, "eval_overall_f1": 0.9568527918781725, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.9617346938775511, "eval_runtime": 0.2755, "eval_samples_per_second": 617.136, "eval_steps_per_second": 10.891, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.0003491460520308465, "learning_rate": 6e-06, "loss": 0.0002, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10284104943275452, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9554140127388535, "eval_overall_precision": 0.9541984732824428, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2864, "eval_samples_per_second": 593.603, "eval_steps_per_second": 10.475, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.00023058451188262552, "learning_rate": 5.500000000000001e-06, "loss": 0.0001, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9548022598870057, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9602272727272727, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10325466841459274, "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9554140127388535, "eval_overall_precision": 0.9541984732824428, "eval_overall_recall": 0.9566326530612245, "eval_runtime": 0.2769, "eval_samples_per_second": 613.882, "eval_steps_per_second": 10.833, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.00042677970486693084, "learning_rate": 5e-06, "loss": 0.0002, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.1025572419166565, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2749, "eval_samples_per_second": 618.381, "eval_steps_per_second": 10.913, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.00043625899706967175, "learning_rate": 4.5e-06, "loss": 0.0002, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10244476050138474, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2764, "eval_samples_per_second": 615.112, "eval_steps_per_second": 10.855, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.0001665508607402444, "learning_rate": 4.000000000000001e-06, "loss": 0.0002, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10252244770526886, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2809, "eval_samples_per_second": 605.297, "eval_steps_per_second": 10.682, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.0002152398374164477, "learning_rate": 3.5000000000000004e-06, "loss": 0.0002, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10386810451745987, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2851, "eval_samples_per_second": 596.31, "eval_steps_per_second": 10.523, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.00013109896099194884, "learning_rate": 3e-06, "loss": 0.0001, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10342691838741302, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2864, "eval_samples_per_second": 593.558, "eval_steps_per_second": 10.475, "step": 9024 }, { "epoch": 95.0, "grad_norm": 8.776304457569495e-05, "learning_rate": 2.5e-06, "loss": 0.0001, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9371428571428573, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9213483146067416, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9491525423728814, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9545454545454546, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.1035594493150711, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9527458492975734, "eval_overall_precision": 0.9539641943734015, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2771, "eval_samples_per_second": 613.457, "eval_steps_per_second": 10.826, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.00011784955131588504, "learning_rate": 2.0000000000000003e-06, "loss": 0.0001, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10866863280534744, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9464285714285714, "eval_overall_precision": 0.9464285714285714, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2796, "eval_samples_per_second": 608.08, "eval_steps_per_second": 10.731, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.0001753137621562928, "learning_rate": 1.5e-06, "loss": 0.0005, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10562655329704285, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2763, "eval_samples_per_second": 615.195, "eval_steps_per_second": 10.856, "step": 9312 }, { "epoch": 98.0, "grad_norm": 0.000347200024407357, "learning_rate": 1.0000000000000002e-06, "loss": 0.0003, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10447113960981369, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2767, "eval_samples_per_second": 614.345, "eval_steps_per_second": 10.841, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.0003261720994487405, "learning_rate": 5.000000000000001e-07, "loss": 0.0001, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10466174781322479, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2746, "eval_samples_per_second": 619.061, "eval_steps_per_second": 10.925, "step": 9504 }, { "epoch": 100.0, "grad_norm": 9.912410314427689e-05, "learning_rate": 0.0, "loss": 0.0002, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9401709401709402, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.953757225433526, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10467950254678726, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2862, "eval_samples_per_second": 593.895, "eval_steps_per_second": 10.48, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3867927199316004.0, "train_loss": 0.005292673466804748, "train_runtime": 863.5274, "train_samples_per_second": 177.296, "train_steps_per_second": 11.117 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3867927199316004.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }