{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 21190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 1.952807928268051e-05, "loss": 0.2656, "step": 500 }, { "epoch": 0.24, "eval_LOC": { "f1": 0.6786599504277605, "number": 6349, "precision": 0.6891848002598246, "recall": 0.6684517246810522 }, "eval_MISC": { "f1": 0.29338446788111217, "number": 1602, "precision": 0.6322314049586777, "recall": 0.19101123595505617 }, "eval_NORP": { "f1": 0.7726153601779048, "number": 5690, "precision": 0.7048253876249819, "recall": 0.8548330404217926 }, "eval_PER": { "f1": 0.848227848101266, "number": 16183, "precision": 0.8693001232405786, "recall": 0.8281530000617933 }, "eval_loss": 0.1907876580953598, "eval_overall_accuracy": 0.9462788899739584, "eval_overall_f1": 0.7762656505171475, "eval_overall_precision": 0.7878453038674034, "eval_overall_recall": 0.7650214592274678, "eval_runtime": 36.0943, "eval_samples_per_second": 117.359, "eval_steps_per_second": 14.684, "step": 500 }, { "epoch": 0.47, "learning_rate": 1.905615856536102e-05, "loss": 0.1899, "step": 1000 }, { "epoch": 0.47, "eval_LOC": { "f1": 0.7273017902813299, "number": 6349, "precision": 0.738276813240305, "recall": 0.7166482910694597 }, "eval_MISC": { "f1": 0.45315161839863716, "number": 1602, "precision": 0.7131367292225201, "recall": 0.33208489388264667 }, "eval_NORP": { "f1": 0.8094145636842554, "number": 5690, "precision": 0.7835170258266162, "recall": 0.8370826010544815 }, "eval_PER": { "f1": 0.8706320857275974, "number": 16183, "precision": 0.8752810392205845, "recall": 0.8660322560711858 }, "eval_loss": 0.1607111096382141, "eval_overall_accuracy": 0.9542287190755209, "eval_overall_f1": 0.811233510131919, "eval_overall_precision": 0.8227586206896552, "eval_overall_recall": 0.8000268240343348, "eval_runtime": 33.8998, "eval_samples_per_second": 124.956, "eval_steps_per_second": 15.634, "step": 1000 }, { "epoch": 0.71, "learning_rate": 1.858423784804153e-05, "loss": 0.1609, "step": 1500 }, { "epoch": 0.71, "eval_LOC": { "f1": 0.7361812892359516, "number": 6349, "precision": 0.7162222553254879, "recall": 0.7572846117498818 }, "eval_MISC": { "f1": 0.5344885038320558, "number": 1602, "precision": 0.5732666190135811, "recall": 0.5006242197253433 }, "eval_NORP": { "f1": 0.8192258613356019, "number": 5690, "precision": 0.7938994229183842, "recall": 0.8462214411247804 }, "eval_PER": { "f1": 0.8834497380892989, "number": 16183, "precision": 0.8916231355025489, "recall": 0.8754248285237595 }, "eval_loss": 0.14290471374988556, "eval_overall_accuracy": 0.9570109049479166, "eval_overall_f1": 0.8212389380530973, "eval_overall_precision": 0.8179338787999734, "eval_overall_recall": 0.8245708154506438, "eval_runtime": 33.8876, "eval_samples_per_second": 125.001, "eval_steps_per_second": 15.64, "step": 1500 }, { "epoch": 0.94, "learning_rate": 1.8112317130722038e-05, "loss": 0.1468, "step": 2000 }, { "epoch": 0.94, "eval_LOC": { "f1": 0.7594586281144263, "number": 6349, "precision": 0.7419984973703982, "recall": 0.7777602772090093 }, "eval_MISC": { "f1": 0.5709592641261497, "number": 1602, "precision": 0.602635228848821, "recall": 0.5424469413233458 }, "eval_NORP": { "f1": 0.8396145241782826, "number": 5690, "precision": 0.8224881995954147, "recall": 0.8574692442882249 }, "eval_PER": { "f1": 0.8936196585169348, "number": 16183, "precision": 0.8962580805569369, "recall": 0.8909967249582895 }, "eval_loss": 0.13329361379146576, "eval_overall_accuracy": 0.9607187906901041, "eval_overall_f1": 0.8376570294122554, "eval_overall_precision": 0.8335823621210612, "eval_overall_recall": 0.8417717274678111, "eval_runtime": 35.3996, "eval_samples_per_second": 119.662, "eval_steps_per_second": 14.972, "step": 2000 }, { "epoch": 1.18, "learning_rate": 1.7640396413402548e-05, "loss": 0.1227, "step": 2500 }, { "epoch": 1.18, "eval_LOC": { "f1": 0.7548371685643762, "number": 6349, "precision": 0.7632850241545893, "recall": 0.746574263663569 }, "eval_MISC": { "f1": 0.6036542515811666, "number": 1602, "precision": 0.6905144694533762, "recall": 0.5362047440699126 }, "eval_NORP": { "f1": 0.8488587291795189, "number": 5690, "precision": 0.8513346296623652, "recall": 0.8463971880492092 }, "eval_PER": { "f1": 0.895450291338345, "number": 16183, "precision": 0.8754427390791027, "recall": 0.9163937465241303 }, "eval_loss": 0.1358954906463623, "eval_overall_accuracy": 0.9613240559895834, "eval_overall_f1": 0.8432567849686847, "eval_overall_precision": 0.8400718778077269, "eval_overall_recall": 0.8464659334763949, "eval_runtime": 33.6007, "eval_samples_per_second": 126.069, "eval_steps_per_second": 15.773, "step": 2500 }, { "epoch": 1.42, "learning_rate": 1.7168475696083062e-05, "loss": 0.1043, "step": 3000 }, { "epoch": 1.42, "eval_LOC": { "f1": 0.7524268013574303, "number": 6349, "precision": 0.7540335336918697, "recall": 0.7508269018743109 }, "eval_MISC": { "f1": 0.6292059219380888, "number": 1602, "precision": 0.6824817518248175, "recall": 0.583645443196005 }, "eval_NORP": { "f1": 0.8393367638650656, "number": 5690, "precision": 0.7840683656340607, "recall": 0.90298769771529 }, "eval_PER": { "f1": 0.9035958798495035, "number": 16183, "precision": 0.9019269839315397, "recall": 0.9052709633566088 }, "eval_loss": 0.1304474025964737, "eval_overall_accuracy": 0.96197509765625, "eval_overall_f1": 0.8452712561347658, "eval_overall_precision": 0.8360666491734453, "eval_overall_recall": 0.8546807939914163, "eval_runtime": 34.7702, "eval_samples_per_second": 121.828, "eval_steps_per_second": 15.243, "step": 3000 }, { "epoch": 1.65, "learning_rate": 1.669655497876357e-05, "loss": 0.1078, "step": 3500 }, { "epoch": 1.65, "eval_LOC": { "f1": 0.7776814563795412, "number": 6349, "precision": 0.7782334384858044, "recall": 0.7771302567333438 }, "eval_MISC": { "f1": 0.6092849881396137, "number": 1602, "precision": 0.6664195700518903, "recall": 0.5611735330836455 }, "eval_NORP": { "f1": 0.8464839881148893, "number": 5690, "precision": 0.7980080921257392, "recall": 0.9012302284710018 }, "eval_PER": { "f1": 0.9055894496826277, "number": 16183, "precision": 0.9031405568188802, "recall": 0.9080516591484892 }, "eval_loss": 0.12490628659725189, "eval_overall_accuracy": 0.9640248616536459, "eval_overall_f1": 0.8522172396611858, "eval_overall_precision": 0.8443362074639637, "eval_overall_recall": 0.8602467811158798, "eval_runtime": 35.4399, "eval_samples_per_second": 119.526, "eval_steps_per_second": 14.955, "step": 3500 }, { "epoch": 1.89, "learning_rate": 1.622463426144408e-05, "loss": 0.1041, "step": 4000 }, { "epoch": 1.89, "eval_LOC": { "f1": 0.7832865606485812, "number": 6349, "precision": 0.7754283068374749, "recall": 0.7913057174358167 }, "eval_MISC": { "f1": 0.6451395896400942, "number": 1602, "precision": 0.6994894237782641, "recall": 0.5986267166042447 }, "eval_NORP": { "f1": 0.8637246248896735, "number": 5690, "precision": 0.8675531914893617, "recall": 0.8599297012302285 }, "eval_PER": { "f1": 0.9092253651321871, "number": 16183, "precision": 0.9067666400344171, "recall": 0.9116974602978434 }, "eval_loss": 0.11825834214687347, "eval_overall_accuracy": 0.9657440185546875, "eval_overall_f1": 0.8602836284299741, "eval_overall_precision": 0.8611941803030813, "eval_overall_recall": 0.859375, "eval_runtime": 35.0652, "eval_samples_per_second": 120.804, "eval_steps_per_second": 15.115, "step": 4000 }, { "epoch": 2.12, "learning_rate": 1.575271354412459e-05, "loss": 0.0799, "step": 4500 }, { "epoch": 2.12, "eval_LOC": { "f1": 0.7903951172502409, "number": 6349, "precision": 0.8063247583155825, "recall": 0.7750826901874311 }, "eval_MISC": { "f1": 0.6561734872332983, "number": 1602, "precision": 0.7462211614956245, "recall": 0.5855181023720349 }, "eval_NORP": { "f1": 0.8658915416703648, "number": 5690, "precision": 0.8746637977407208, "recall": 0.8572934973637961 }, "eval_PER": { "f1": 0.912650510125923, "number": 16183, "precision": 0.9050802139037433, "recall": 0.9203485138725823 }, "eval_loss": 0.13416212797164917, "eval_overall_accuracy": 0.9664764404296875, "eval_overall_f1": 0.8656635763378759, "eval_overall_precision": 0.8720103425985779, "eval_overall_recall": 0.8594085300429185, "eval_runtime": 34.7392, "eval_samples_per_second": 121.937, "eval_steps_per_second": 15.257, "step": 4500 }, { "epoch": 2.36, "learning_rate": 1.52807928268051e-05, "loss": 0.0679, "step": 5000 }, { "epoch": 2.36, "eval_LOC": { "f1": 0.7919011882426517, "number": 6349, "precision": 0.7861244761756946, "recall": 0.7977634273113876 }, "eval_MISC": { "f1": 0.6594274432379073, "number": 1602, "precision": 0.697286012526096, "recall": 0.6254681647940075 }, "eval_NORP": { "f1": 0.8735855728429986, "number": 5690, "precision": 0.8788687299893276, "recall": 0.868365553602812 }, "eval_PER": { "f1": 0.9156303340648623, "number": 16183, "precision": 0.9034586466165414, "recall": 0.9281344620898474 }, "eval_loss": 0.1265156865119934, "eval_overall_accuracy": 0.9679005940755209, "eval_overall_f1": 0.868309119113943, "eval_overall_precision": 0.8639426428120954, "eval_overall_recall": 0.872719957081545, "eval_runtime": 34.8379, "eval_samples_per_second": 121.592, "eval_steps_per_second": 15.213, "step": 5000 }, { "epoch": 2.6, "learning_rate": 1.4808872109485608e-05, "loss": 0.0652, "step": 5500 }, { "epoch": 2.6, "eval_LOC": { "f1": 0.79075180386376, "number": 6349, "precision": 0.7792048929663609, "recall": 0.802646085997795 }, "eval_MISC": { "f1": 0.6546880104541, "number": 1602, "precision": 0.686771761480466, "recall": 0.6254681647940075 }, "eval_NORP": { "f1": 0.8732272570044967, "number": 5690, "precision": 0.8595505617977528, "recall": 0.8873462214411247 }, "eval_PER": { "f1": 0.9123680851726177, "number": 16183, "precision": 0.9193224592220828, "recall": 0.9055181363158871 }, "eval_loss": 0.12422272562980652, "eval_overall_accuracy": 0.9671122233072916, "eval_overall_f1": 0.8652682059795094, "eval_overall_precision": 0.865427833495455, "eval_overall_recall": 0.8651086373390557, "eval_runtime": 34.0635, "eval_samples_per_second": 124.356, "eval_steps_per_second": 15.559, "step": 5500 }, { "epoch": 2.83, "learning_rate": 1.4336951392166117e-05, "loss": 0.0695, "step": 6000 }, { "epoch": 2.83, "eval_LOC": { "f1": 0.7890688921616758, "number": 6349, "precision": 0.8210454622850332, "recall": 0.759489683414711 }, "eval_MISC": { "f1": 0.6770870337477797, "number": 1602, "precision": 0.7856553998351196, "recall": 0.5948813982521848 }, "eval_NORP": { "f1": 0.8647034425549563, "number": 5690, "precision": 0.81885310290652, "recall": 0.9159929701230228 }, "eval_PER": { "f1": 0.9161746617466175, "number": 16183, "precision": 0.9118565220052641, "recall": 0.9205338935920411 }, "eval_loss": 0.1276456117630005, "eval_overall_accuracy": 0.9684855143229166, "eval_overall_f1": 0.8684157552170704, "eval_overall_precision": 0.8689405129582383, "eval_overall_recall": 0.8678916309012875, "eval_runtime": 35.0576, "eval_samples_per_second": 120.83, "eval_steps_per_second": 15.118, "step": 6000 }, { "epoch": 3.07, "learning_rate": 1.3865030674846627e-05, "loss": 0.0573, "step": 6500 }, { "epoch": 3.07, "eval_LOC": { "f1": 0.7923842629167325, "number": 6349, "precision": 0.7948961800602314, "recall": 0.7898881713655694 }, "eval_MISC": { "f1": 0.6901140684410647, "number": 1602, "precision": 0.7007722007722008, "recall": 0.6797752808988764 }, "eval_NORP": { "f1": 0.8771058698350772, "number": 5690, "precision": 0.8851109520400859, "recall": 0.869244288224956 }, "eval_PER": { "f1": 0.9171229698375871, "number": 16183, "precision": 0.9182876966918597, "recall": 0.9159611938453933 }, "eval_loss": 0.1303853839635849, "eval_overall_accuracy": 0.967529296875, "eval_overall_f1": 0.8708955349479105, "eval_overall_precision": 0.8742945966951644, "eval_overall_recall": 0.8675228004291845, "eval_runtime": 36.8238, "eval_samples_per_second": 115.034, "eval_steps_per_second": 14.393, "step": 6500 }, { "epoch": 3.3, "learning_rate": 1.3393109957527137e-05, "loss": 0.0444, "step": 7000 }, { "epoch": 3.3, "eval_LOC": { "f1": 0.7984538074990337, "number": 6349, "precision": 0.7840874582447617, "recall": 0.8133564340841077 }, "eval_MISC": { "f1": 0.6787416587225928, "number": 1602, "precision": 0.6912621359223301, "recall": 0.6666666666666666 }, "eval_NORP": { "f1": 0.8763066202090593, "number": 5690, "precision": 0.8687392055267703, "recall": 0.8840070298769771 }, "eval_PER": { "f1": 0.915957745162195, "number": 16183, "precision": 0.9238747799849133, "recall": 0.9081752456281282 }, "eval_loss": 0.13717159628868103, "eval_overall_accuracy": 0.96856689453125, "eval_overall_f1": 0.8703334283271588, "eval_overall_precision": 0.87026048476315, "eval_overall_recall": 0.8704063841201717, "eval_runtime": 33.9103, "eval_samples_per_second": 124.918, "eval_steps_per_second": 15.629, "step": 7000 }, { "epoch": 3.54, "learning_rate": 1.2921189240207645e-05, "loss": 0.0428, "step": 7500 }, { "epoch": 3.54, "eval_LOC": { "f1": 0.8001958064779311, "number": 6349, "precision": 0.8300609343263372, "recall": 0.7724051031658529 }, "eval_MISC": { "f1": 0.6906565656565657, "number": 1602, "precision": 0.698595146871009, "recall": 0.682896379525593 }, "eval_NORP": { "f1": 0.8765986279325824, "number": 5690, "precision": 0.8460029426189308, "recall": 0.9094903339191565 }, "eval_PER": { "f1": 0.9160423025930381, "number": 16183, "precision": 0.9141538461538462, "recall": 0.9179385775196194 }, "eval_loss": 0.13739602267742157, "eval_overall_accuracy": 0.9689229329427084, "eval_overall_f1": 0.8724712980809519, "eval_overall_precision": 0.8722227807379109, "eval_overall_recall": 0.872719957081545, "eval_runtime": 34.6373, "eval_samples_per_second": 122.296, "eval_steps_per_second": 15.301, "step": 7500 }, { "epoch": 3.78, "learning_rate": 1.2449268522888157e-05, "loss": 0.0406, "step": 8000 }, { "epoch": 3.78, "eval_LOC": { "f1": 0.8012557353296305, "number": 6349, "precision": 0.819394138952914, "recall": 0.7839029768467475 }, "eval_MISC": { "f1": 0.7026332691072574, "number": 1602, "precision": 0.7235449735449735, "recall": 0.682896379525593 }, "eval_NORP": { "f1": 0.8827633732448963, "number": 5690, "precision": 0.8656867714140902, "recall": 0.9005272407732865 }, "eval_PER": { "f1": 0.9211724791392853, "number": 16183, "precision": 0.9113947018265393, "recall": 0.931162330841006 }, "eval_loss": 0.13875727355480194, "eval_overall_accuracy": 0.9702097574869791, "eval_overall_f1": 0.8774702236773967, "eval_overall_precision": 0.8743300376177636, "eval_overall_recall": 0.8806330472103004, "eval_runtime": 34.5296, "eval_samples_per_second": 122.677, "eval_steps_per_second": 15.349, "step": 8000 }, { "epoch": 4.01, "learning_rate": 1.1977347805568667e-05, "loss": 0.0449, "step": 8500 }, { "epoch": 4.01, "eval_LOC": { "f1": 0.8041580701475957, "number": 6349, "precision": 0.8103310411002719, "recall": 0.7980784375492204 }, "eval_MISC": { "f1": 0.7114228456913828, "number": 1602, "precision": 0.7650862068965517, "recall": 0.6647940074906367 }, "eval_NORP": { "f1": 0.8826941390355814, "number": 5690, "precision": 0.8698174372973895, "recall": 0.8959578207381371 }, "eval_PER": { "f1": 0.9232705174636303, "number": 16183, "precision": 0.9171900725654003, "recall": 0.9294321201260582 }, "eval_loss": 0.13972027599811554, "eval_overall_accuracy": 0.9705251057942709, "eval_overall_f1": 0.8796731905774414, "eval_overall_precision": 0.8784818592208661, "eval_overall_recall": 0.8808677575107297, "eval_runtime": 35.3655, "eval_samples_per_second": 119.778, "eval_steps_per_second": 14.986, "step": 8500 }, { "epoch": 4.25, "learning_rate": 1.1505427088249175e-05, "loss": 0.0269, "step": 9000 }, { "epoch": 4.25, "eval_LOC": { "f1": 0.8022795630837423, "number": 6349, "precision": 0.8063643595863166, "recall": 0.7982359426681367 }, "eval_MISC": { "f1": 0.716551724137931, "number": 1602, "precision": 0.800462249614792, "recall": 0.6485642946317104 }, "eval_NORP": { "f1": 0.879294097489934, "number": 5690, "precision": 0.8577636637138559, "recall": 0.901933216168717 }, "eval_PER": { "f1": 0.9192701977056382, "number": 16183, "precision": 0.9079129753510516, "recall": 0.9309151578817277 }, "eval_loss": 0.1521347165107727, "eval_overall_accuracy": 0.9700419108072916, "eval_overall_f1": 0.8770484970741711, "eval_overall_precision": 0.8721774594648364, "eval_overall_recall": 0.8819742489270386, "eval_runtime": 35.7932, "eval_samples_per_second": 118.347, "eval_steps_per_second": 14.807, "step": 9000 }, { "epoch": 4.48, "learning_rate": 1.1033506370929685e-05, "loss": 0.0261, "step": 9500 }, { "epoch": 4.48, "eval_LOC": { "f1": 0.8101327290385238, "number": 6349, "precision": 0.8331946062926585, "recall": 0.7883131201764058 }, "eval_MISC": { "f1": 0.707196029776675, "number": 1602, "precision": 0.7028360049321825, "recall": 0.7116104868913857 }, "eval_NORP": { "f1": 0.8803396586084394, "number": 5690, "precision": 0.8682276533925825, "recall": 0.8927943760984183 }, "eval_PER": { "f1": 0.9222290263319044, "number": 16183, "precision": 0.9140013351945135, "recall": 0.9306061916826299 }, "eval_loss": 0.15231968462467194, "eval_overall_accuracy": 0.9703928629557291, "eval_overall_f1": 0.8793763904919624, "eval_overall_precision": 0.8774243081750509, "eval_overall_recall": 0.881337178111588, "eval_runtime": 35.4408, "eval_samples_per_second": 119.523, "eval_steps_per_second": 14.955, "step": 9500 }, { "epoch": 4.72, "learning_rate": 1.0561585653610193e-05, "loss": 0.0285, "step": 10000 }, { "epoch": 4.72, "eval_LOC": { "f1": 0.8007642338555598, "number": 6349, "precision": 0.7777612826603325, "recall": 0.8251693180028351 }, "eval_MISC": { "f1": 0.7110076488194212, "number": 1602, "precision": 0.7608540925266903, "recall": 0.66729088639201 }, "eval_NORP": { "f1": 0.8860268172194778, "number": 5690, "precision": 0.8894792773645058, "recall": 0.8826010544815466 }, "eval_PER": { "f1": 0.9219896875947831, "number": 16183, "precision": 0.9054029904092452, "recall": 0.9391954520175493 }, "eval_loss": 0.14308173954486847, "eval_overall_accuracy": 0.9701334635416666, "eval_overall_f1": 0.8784727971124872, "eval_overall_precision": 0.8676980440897495, "eval_overall_recall": 0.889518508583691, "eval_runtime": 33.8885, "eval_samples_per_second": 124.998, "eval_steps_per_second": 15.64, "step": 10000 }, { "epoch": 4.96, "learning_rate": 1.0089664936290703e-05, "loss": 0.0289, "step": 10500 }, { "epoch": 4.96, "eval_LOC": { "f1": 0.8048566179407302, "number": 6349, "precision": 0.8165316045380875, "recall": 0.7935107891006458 }, "eval_MISC": { "f1": 0.718897108271688, "number": 1602, "precision": 0.7791545189504373, "recall": 0.66729088639201 }, "eval_NORP": { "f1": 0.8847718812224049, "number": 5690, "precision": 0.8718648694761986, "recall": 0.8980667838312829 }, "eval_PER": { "f1": 0.9236515297706123, "number": 16183, "precision": 0.9305154897780008, "recall": 0.9168880924426868 }, "eval_loss": 0.1427399069070816, "eval_overall_accuracy": 0.970794677734375, "eval_overall_f1": 0.8806381288763456, "eval_overall_precision": 0.8877644894204232, "eval_overall_recall": 0.8736252682403434, "eval_runtime": 35.6412, "eval_samples_per_second": 118.851, "eval_steps_per_second": 14.87, "step": 10500 }, { "epoch": 5.19, "learning_rate": 9.617744218971213e-06, "loss": 0.0238, "step": 11000 }, { "epoch": 5.19, "eval_LOC": { "f1": 0.8100460975997457, "number": 6349, "precision": 0.8175838280121932, "recall": 0.802646085997795 }, "eval_MISC": { "f1": 0.7157343801112201, "number": 1602, "precision": 0.7518900343642612, "recall": 0.682896379525593 }, "eval_NORP": { "f1": 0.8887525203822214, "number": 5690, "precision": 0.8866538394262725, "recall": 0.8908611599297013 }, "eval_PER": { "f1": 0.9244720420521962, "number": 16183, "precision": 0.919806704183998, "recall": 0.9291849471667799 }, "eval_loss": 0.15089917182922363, "eval_overall_accuracy": 0.9715423583984375, "eval_overall_f1": 0.8827567685516223, "eval_overall_precision": 0.8838100359627601, "eval_overall_recall": 0.881706008583691, "eval_runtime": 34.4219, "eval_samples_per_second": 123.061, "eval_steps_per_second": 15.397, "step": 11000 }, { "epoch": 5.43, "learning_rate": 9.145823501651723e-06, "loss": 0.0195, "step": 11500 }, { "epoch": 5.43, "eval_LOC": { "f1": 0.8136754861332485, "number": 6349, "precision": 0.8235199225681562, "recall": 0.8040636320680422 }, "eval_MISC": { "f1": 0.7222403126017584, "number": 1602, "precision": 0.7549353301565691, "recall": 0.6922596754057428 }, "eval_NORP": { "f1": 0.888985557682269, "number": 5690, "precision": 0.8802549965541007, "recall": 0.8978910369068541 }, "eval_PER": { "f1": 0.9241790677653425, "number": 16183, "precision": 0.9197111559880057, "recall": 0.9286906012482234 }, "eval_loss": 0.16119226813316345, "eval_overall_accuracy": 0.9717254638671875, "eval_overall_f1": 0.8837466673373913, "eval_overall_precision": 0.883909703820481, "eval_overall_recall": 0.8835836909871244, "eval_runtime": 34.047, "eval_samples_per_second": 124.416, "eval_steps_per_second": 15.567, "step": 11500 }, { "epoch": 5.66, "learning_rate": 8.673902784332233e-06, "loss": 0.0218, "step": 12000 }, { "epoch": 5.66, "eval_LOC": { "f1": 0.7971036820212603, "number": 6349, "precision": 0.7800391979496457, "recall": 0.8149314852732714 }, "eval_MISC": { "f1": 0.7200520833333334, "number": 1602, "precision": 0.7523809523809524, "recall": 0.6903870162297129 }, "eval_NORP": { "f1": 0.8812952705581594, "number": 5690, "precision": 0.8554177005789909, "recall": 0.9087873462214411 }, "eval_PER": { "f1": 0.9216090609707305, "number": 16183, "precision": 0.9180770174147658, "recall": 0.9251683865785083 }, "eval_loss": 0.16442249715328217, "eval_overall_accuracy": 0.9700113932291666, "eval_overall_f1": 0.8766755142667552, "eval_overall_precision": 0.8675794588915157, "eval_overall_recall": 0.8859643240343348, "eval_runtime": 35.3444, "eval_samples_per_second": 119.849, "eval_steps_per_second": 14.995, "step": 12000 }, { "epoch": 5.9, "learning_rate": 8.201982067012742e-06, "loss": 0.0183, "step": 12500 }, { "epoch": 5.9, "eval_LOC": { "f1": 0.8106127001832523, "number": 6349, "precision": 0.8202192841019026, "recall": 0.8012285399275476 }, "eval_MISC": { "f1": 0.7068697544295928, "number": 1602, "precision": 0.7040247678018576, "recall": 0.7097378277153558 }, "eval_NORP": { "f1": 0.8847426002883555, "number": 5690, "precision": 0.8549418128175709, "recall": 0.9166959578207381 }, "eval_PER": { "f1": 0.9248411591507826, "number": 16183, "precision": 0.9277453053102848, "recall": 0.921955138107891 }, "eval_loss": 0.16248470544815063, "eval_overall_accuracy": 0.971343994140625, "eval_overall_f1": 0.8812516715699384, "eval_overall_precision": 0.8786666666666667, "eval_overall_recall": 0.8838519313304721, "eval_runtime": 34.2026, "eval_samples_per_second": 123.85, "eval_steps_per_second": 15.496, "step": 12500 }, { "epoch": 6.13, "learning_rate": 7.730061349693252e-06, "loss": 0.016, "step": 13000 }, { "epoch": 6.13, "eval_LOC": { "f1": 0.8088959491660047, "number": 6349, "precision": 0.8158948886396411, "recall": 0.8020160655221295 }, "eval_MISC": { "f1": 0.7173844702923609, "number": 1602, "precision": 0.7226092463584547, "recall": 0.712234706616729 }, "eval_NORP": { "f1": 0.8844587716321684, "number": 5690, "precision": 0.8548704493276484, "recall": 0.9161687170474516 }, "eval_PER": { "f1": 0.9247578842811025, "number": 16183, "precision": 0.9290837647352336, "recall": 0.9204721003522215 }, "eval_loss": 0.17019298672676086, "eval_overall_accuracy": 0.9712270100911459, "eval_overall_f1": 0.8813718109577583, "eval_overall_precision": 0.8795031885412841, "eval_overall_recall": 0.8832483905579399, "eval_runtime": 33.7485, "eval_samples_per_second": 125.517, "eval_steps_per_second": 15.704, "step": 13000 }, { "epoch": 6.37, "learning_rate": 7.258140632373761e-06, "loss": 0.0135, "step": 13500 }, { "epoch": 6.37, "eval_LOC": { "f1": 0.8114028274040702, "number": 6349, "precision": 0.8004597701149425, "recall": 0.8226492361001733 }, "eval_MISC": { "f1": 0.7318982387475539, "number": 1602, "precision": 0.7663934426229508, "recall": 0.700374531835206 }, "eval_NORP": { "f1": 0.883330448329583, "number": 5690, "precision": 0.870225102319236, "recall": 0.8968365553602812 }, "eval_PER": { "f1": 0.9247437955303124, "number": 16183, "precision": 0.923888237833837, "recall": 0.9256009392572453 }, "eval_loss": 0.16678640246391296, "eval_overall_accuracy": 0.9714457194010416, "eval_overall_f1": 0.8825179495742194, "eval_overall_precision": 0.8789662741967671, "eval_overall_recall": 0.8860984442060086, "eval_runtime": 33.9369, "eval_samples_per_second": 124.82, "eval_steps_per_second": 15.617, "step": 13500 }, { "epoch": 6.61, "learning_rate": 6.786219915054272e-06, "loss": 0.0127, "step": 14000 }, { "epoch": 6.61, "eval_LOC": { "f1": 0.8155262740117478, "number": 6349, "precision": 0.8220515282445191, "recall": 0.8091037958733659 }, "eval_MISC": { "f1": 0.7328699106256207, "number": 1602, "precision": 0.7801268498942917, "recall": 0.6910112359550562 }, "eval_NORP": { "f1": 0.885006518904824, "number": 5690, "precision": 0.8754944110060189, "recall": 0.8947275922671353 }, "eval_PER": { "f1": 0.9258804761320408, "number": 16183, "precision": 0.9193931639554012, "recall": 0.9324599888772168 }, "eval_loss": 0.1748281568288803, "eval_overall_accuracy": 0.9720052083333334, "eval_overall_f1": 0.8849631614199598, "eval_overall_precision": 0.8838975113727588, "eval_overall_recall": 0.8860313841201717, "eval_runtime": 34.1866, "eval_samples_per_second": 123.908, "eval_steps_per_second": 15.503, "step": 14000 }, { "epoch": 6.84, "learning_rate": 6.314299197734781e-06, "loss": 0.014, "step": 14500 }, { "epoch": 6.84, "eval_LOC": { "f1": 0.8157271435338702, "number": 6349, "precision": 0.817793256292544, "recall": 0.8136714443219405 }, "eval_MISC": { "f1": 0.7454485269778219, "number": 1602, "precision": 0.7935165609584214, "recall": 0.7028714107365793 }, "eval_NORP": { "f1": 0.8912159238424924, "number": 5690, "precision": 0.877919863597613, "recall": 0.9049209138840071 }, "eval_PER": { "f1": 0.9255973540230596, "number": 16183, "precision": 0.9260267194458189, "recall": 0.9251683865785083 }, "eval_loss": 0.17196723818778992, "eval_overall_accuracy": 0.9721476236979166, "eval_overall_f1": 0.8864463947107882, "eval_overall_precision": 0.8872652759582116, "eval_overall_recall": 0.8856290236051502, "eval_runtime": 35.3824, "eval_samples_per_second": 119.721, "eval_steps_per_second": 14.979, "step": 14500 }, { "epoch": 7.08, "learning_rate": 5.84237848041529e-06, "loss": 0.0118, "step": 15000 }, { "epoch": 7.08, "eval_LOC": { "f1": 0.8129576567971983, "number": 6349, "precision": 0.8217216411906677, "recall": 0.804378642305875 }, "eval_MISC": { "f1": 0.7294871794871794, "number": 1602, "precision": 0.7496706192358367, "recall": 0.7103620474406991 }, "eval_NORP": { "f1": 0.8883183568677792, "number": 5690, "precision": 0.8657214345287739, "recall": 0.9121265377855887 }, "eval_PER": { "f1": 0.9262795910221375, "number": 16183, "precision": 0.9205419260344196, "recall": 0.9320892294382994 }, "eval_loss": 0.18032006919384003, "eval_overall_accuracy": 0.9713643391927084, "eval_overall_f1": 0.8848810437452033, "eval_overall_precision": 0.8806203094906023, "eval_overall_recall": 0.8891832081545065, "eval_runtime": 36.5943, "eval_samples_per_second": 115.756, "eval_steps_per_second": 14.483, "step": 15000 }, { "epoch": 7.31, "learning_rate": 5.3704577630958e-06, "loss": 0.0103, "step": 15500 }, { "epoch": 7.31, "eval_LOC": { "f1": 0.8092074038917892, "number": 6349, "precision": 0.812807881773399, "recall": 0.8056386832572059 }, "eval_MISC": { "f1": 0.7470997679814386, "number": 1602, "precision": 0.7964664310954064, "recall": 0.7034956304619226 }, "eval_NORP": { "f1": 0.8900523560209423, "number": 5690, "precision": 0.8698204999161214, "recall": 0.9112478031634447 }, "eval_PER": { "f1": 0.9261114012719285, "number": 16183, "precision": 0.9209336429182451, "recall": 0.9313477105604647 }, "eval_loss": 0.17802822589874268, "eval_overall_accuracy": 0.9718373616536459, "eval_overall_f1": 0.8853806445146092, "eval_overall_precision": 0.8822706842017646, "eval_overall_recall": 0.8885126072961373, "eval_runtime": 34.0065, "eval_samples_per_second": 124.565, "eval_steps_per_second": 15.585, "step": 15500 }, { "epoch": 7.55, "learning_rate": 4.89853704577631e-06, "loss": 0.0081, "step": 16000 }, { "epoch": 7.55, "eval_LOC": { "f1": 0.8206599713055953, "number": 6349, "precision": 0.8307245441342586, "recall": 0.8108363521814459 }, "eval_MISC": { "f1": 0.7422750424448217, "number": 1602, "precision": 0.813849590469099, "recall": 0.6822721598002497 }, "eval_NORP": { "f1": 0.8894804637183341, "number": 5690, "precision": 0.8696893366918556, "recall": 0.9101933216168717 }, "eval_PER": { "f1": 0.9277675840978593, "number": 16183, "precision": 0.9183871163044136, "recall": 0.9373416548229624 }, "eval_loss": 0.1846681833267212, "eval_overall_accuracy": 0.9726308186848959, "eval_overall_f1": 0.8887291931278828, "eval_overall_precision": 0.8859456217512994, "eval_overall_recall": 0.8915303111587983, "eval_runtime": 35.2193, "eval_samples_per_second": 120.275, "eval_steps_per_second": 15.049, "step": 16000 }, { "epoch": 7.79, "learning_rate": 4.42661632845682e-06, "loss": 0.0098, "step": 16500 }, { "epoch": 7.79, "eval_LOC": { "f1": 0.8185487061802642, "number": 6349, "precision": 0.8225190839694656, "recall": 0.8146164750354387 }, "eval_MISC": { "f1": 0.7496598639455783, "number": 1602, "precision": 0.8236173393124065, "recall": 0.6878901373283396 }, "eval_NORP": { "f1": 0.8895378800893318, "number": 5690, "precision": 0.8699596774193549, "recall": 0.9100175746924429 }, "eval_PER": { "f1": 0.9280763899584168, "number": 16183, "precision": 0.925254882692544, "recall": 0.9309151578817277 }, "eval_loss": 0.18522000312805176, "eval_overall_accuracy": 0.9727986653645834, "eval_overall_f1": 0.8885798538971919, "eval_overall_precision": 0.8880442062960482, "eval_overall_recall": 0.8891161480686696, "eval_runtime": 33.8843, "eval_samples_per_second": 125.014, "eval_steps_per_second": 15.641, "step": 16500 }, { "epoch": 8.02, "learning_rate": 3.954695611137329e-06, "loss": 0.0078, "step": 17000 }, { "epoch": 8.02, "eval_LOC": { "f1": 0.8156406785060126, "number": 6349, "precision": 0.8249033505154639, "recall": 0.806583713970704 }, "eval_MISC": { "f1": 0.7453208556149733, "number": 1602, "precision": 0.802158273381295, "recall": 0.6960049937578028 }, "eval_NORP": { "f1": 0.8898399383719935, "number": 5690, "precision": 0.8673452361087935, "recall": 0.9135325131810194 }, "eval_PER": { "f1": 0.9259453619385708, "number": 16183, "precision": 0.9251171971379225, "recall": 0.926775010813817 }, "eval_loss": 0.19055458903312683, "eval_overall_accuracy": 0.9721832275390625, "eval_overall_f1": 0.886578227983967, "eval_overall_precision": 0.8868905814850854, "eval_overall_recall": 0.8862660944206009, "eval_runtime": 35.3527, "eval_samples_per_second": 119.821, "eval_steps_per_second": 14.992, "step": 17000 }, { "epoch": 8.26, "learning_rate": 3.482774893817839e-06, "loss": 0.0063, "step": 17500 }, { "epoch": 8.26, "eval_LOC": { "f1": 0.8183385579937305, "number": 6349, "precision": 0.8143815317423179, "recall": 0.8223342258623405 }, "eval_MISC": { "f1": 0.7419786096256684, "number": 1602, "precision": 0.7985611510791367, "recall": 0.6928838951310862 }, "eval_NORP": { "f1": 0.8902174854865263, "number": 5690, "precision": 0.8779695778499402, "recall": 0.9028119507908612 }, "eval_PER": { "f1": 0.926946847960445, "number": 16183, "precision": 0.927118748840947, "recall": 0.926775010813817 }, "eval_loss": 0.19118891656398773, "eval_overall_accuracy": 0.9723765055338541, "eval_overall_f1": 0.8873317352019178, "eval_overall_precision": 0.8872573669918535, "eval_overall_recall": 0.8874061158798283, "eval_runtime": 35.4944, "eval_samples_per_second": 119.343, "eval_steps_per_second": 14.932, "step": 17500 }, { "epoch": 8.49, "learning_rate": 3.0108541764983485e-06, "loss": 0.0059, "step": 18000 }, { "epoch": 8.49, "eval_LOC": { "f1": 0.8186487554326354, "number": 6349, "precision": 0.8214398985093562, "recall": 0.8158765159867696 }, "eval_MISC": { "f1": 0.7347328244274809, "number": 1602, "precision": 0.7490272373540856, "recall": 0.7209737827715356 }, "eval_NORP": { "f1": 0.8874354561101548, "number": 5690, "precision": 0.869477234401349, "recall": 0.9061511423550088 }, "eval_PER": { "f1": 0.9275050685015666, "number": 16183, "precision": 0.9221794636857858, "recall": 0.9328925415559538 }, "eval_loss": 0.19088691473007202, "eval_overall_accuracy": 0.9722646077473959, "eval_overall_f1": 0.886665666216464, "eval_overall_precision": 0.8818866297389631, "eval_overall_recall": 0.8914967811158798, "eval_runtime": 34.1683, "eval_samples_per_second": 123.975, "eval_steps_per_second": 15.511, "step": 18000 }, { "epoch": 8.73, "learning_rate": 2.5389334591788585e-06, "loss": 0.0059, "step": 18500 }, { "epoch": 8.73, "eval_LOC": { "f1": 0.8207652008255277, "number": 6349, "precision": 0.8273323731797088, "recall": 0.8143014647976059 }, "eval_MISC": { "f1": 0.7507487520798669, "number": 1602, "precision": 0.8039914468995011, "recall": 0.704119850187266 }, "eval_NORP": { "f1": 0.8878091106290672, "number": 5690, "precision": 0.8767780634104542, "recall": 0.8991212653778559 }, "eval_PER": { "f1": 0.9267059295118868, "number": 16183, "precision": 0.9248522895125554, "recall": 0.9285670147685843 }, "eval_loss": 0.19563351571559906, "eval_overall_accuracy": 0.9725545247395834, "eval_overall_f1": 0.8878926778488558, "eval_overall_precision": 0.8892214561964016, "eval_overall_recall": 0.886567864806867, "eval_runtime": 35.4075, "eval_samples_per_second": 119.636, "eval_steps_per_second": 14.969, "step": 18500 }, { "epoch": 8.97, "learning_rate": 2.0670127418593677e-06, "loss": 0.0069, "step": 19000 }, { "epoch": 8.97, "eval_LOC": { "f1": 0.8209361974991985, "number": 6349, "precision": 0.8358087155214624, "recall": 0.806583713970704 }, "eval_MISC": { "f1": 0.7553444180522565, "number": 1602, "precision": 0.8275092936802974, "recall": 0.6947565543071161 }, "eval_NORP": { "f1": 0.8875515818431913, "number": 5690, "precision": 0.8687310669808146, "recall": 0.9072056239015818 }, "eval_PER": { "f1": 0.9272833399895548, "number": 16183, "precision": 0.9220430107526881, "recall": 0.932583575356856 }, "eval_loss": 0.19536001980304718, "eval_overall_accuracy": 0.972808837890625, "eval_overall_f1": 0.8887695869543335, "eval_overall_precision": 0.8893962796319925, "eval_overall_recall": 0.8881437768240343, "eval_runtime": 35.4791, "eval_samples_per_second": 119.394, "eval_steps_per_second": 14.938, "step": 19000 }, { "epoch": 9.2, "learning_rate": 1.5950920245398775e-06, "loss": 0.006, "step": 19500 }, { "epoch": 9.2, "eval_LOC": { "f1": 0.8205210164615631, "number": 6349, "precision": 0.8327656123276561, "recall": 0.8086312805166168 }, "eval_MISC": { "f1": 0.7514871116986119, "number": 1602, "precision": 0.7984550561797753, "recall": 0.7097378277153558 }, "eval_NORP": { "f1": 0.8861538461538462, "number": 5690, "precision": 0.8625623960066556, "recall": 0.9110720562390158 }, "eval_PER": { "f1": 0.9265025233216089, "number": 16183, "precision": 0.9172722868217055, "recall": 0.9359204103071124 }, "eval_loss": 0.19643273949623108, "eval_overall_accuracy": 0.9723968505859375, "eval_overall_f1": 0.8876616334362226, "eval_overall_precision": 0.8834313041745542, "eval_overall_recall": 0.8919326716738197, "eval_runtime": 34.2373, "eval_samples_per_second": 123.725, "eval_steps_per_second": 15.48, "step": 19500 }, { "epoch": 9.44, "learning_rate": 1.123171307220387e-06, "loss": 0.0046, "step": 20000 }, { "epoch": 9.44, "eval_LOC": { "f1": 0.8190899001109878, "number": 6349, "precision": 0.8245810055865922, "recall": 0.8136714443219405 }, "eval_MISC": { "f1": 0.7499167499167498, "number": 1602, "precision": 0.8037116345467523, "recall": 0.7028714107365793 }, "eval_NORP": { "f1": 0.8883740883740884, "number": 5690, "precision": 0.8678960603520537, "recall": 0.909841827768014 }, "eval_PER": { "f1": 0.9276177525033996, "number": 16183, "precision": 0.9279045322450998, "recall": 0.927331149972193 }, "eval_loss": 0.19752554595470428, "eval_overall_accuracy": 0.9726409912109375, "eval_overall_f1": 0.8880391762259341, "eval_overall_precision": 0.8883371359549054, "eval_overall_recall": 0.8877414163090128, "eval_runtime": 35.5214, "eval_samples_per_second": 119.252, "eval_steps_per_second": 14.921, "step": 20000 }, { "epoch": 9.67, "learning_rate": 6.512505899008966e-07, "loss": 0.0038, "step": 20500 }, { "epoch": 9.67, "eval_LOC": { "f1": 0.8203611954610835, "number": 6349, "precision": 0.832603406326034, "recall": 0.8084737753977004 }, "eval_MISC": { "f1": 0.7467811158798283, "number": 1602, "precision": 0.7925718290119131, "recall": 0.7059925093632958 }, "eval_NORP": { "f1": 0.8882978723404255, "number": 5690, "precision": 0.8677505866577271, "recall": 0.909841827768014 }, "eval_PER": { "f1": 0.9284789444478698, "number": 16183, "precision": 0.9264226391879422, "recall": 0.9305443984428103 }, "eval_loss": 0.1979084610939026, "eval_overall_accuracy": 0.972808837890625, "eval_overall_f1": 0.8887100290088367, "eval_overall_precision": 0.8888739811491632, "eval_overall_recall": 0.8885461373390557, "eval_runtime": 34.2132, "eval_samples_per_second": 123.812, "eval_steps_per_second": 15.491, "step": 20500 }, { "epoch": 9.91, "learning_rate": 1.7932987258140632e-07, "loss": 0.0057, "step": 21000 }, { "epoch": 9.91, "eval_LOC": { "f1": 0.8203578528827038, "number": 6349, "precision": 0.8284612913588179, "recall": 0.8124114033706096 }, "eval_MISC": { "f1": 0.748, "number": 1602, "precision": 0.8025751072961373, "recall": 0.700374531835206 }, "eval_NORP": { "f1": 0.8893664574916187, "number": 5690, "precision": 0.8704358068315665, "recall": 0.9091388400702988 }, "eval_PER": { "f1": 0.9278439602493672, "number": 16183, "precision": 0.9268142302238116, "recall": 0.9288759809676821 }, "eval_loss": 0.19716496765613556, "eval_overall_accuracy": 0.9727630615234375, "eval_overall_f1": 0.8886092937426606, "eval_overall_precision": 0.8891761230108104, "eval_overall_recall": 0.8880431866952789, "eval_runtime": 34.0058, "eval_samples_per_second": 124.567, "eval_steps_per_second": 15.586, "step": 21000 } ], "max_steps": 21190, "num_train_epochs": 10, "total_flos": 8856293355753336.0, "trial_name": null, "trial_params": null }