{ "best_metric": 0.260405570268631, "best_model_checkpoint": "./checkpoint-huawei-noah/checkpoint-45000", "epoch": 1.9922082521692934, "eval_steps": 1000, "global_step": 45000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 13.771051406860352, "learning_rate": 1.9873510587163855e-05, "loss": 0.7093, "step": 1000 }, { "epoch": 0.04, "eval_LOC_f1": 0.6130136331478614, "eval_ORG_f1": 0.5179822369964004, "eval_PER_f1": 0.6815983074481896, "eval_loss": 0.5055311322212219, "eval_overall_accuracy": 0.8334903358124973, "eval_overall_f1": 0.6058789768206305, "eval_overall_precision": 0.5856612385653673, "eval_overall_recall": 0.6275425074138874, "eval_runtime": 974.5486, "eval_samples_per_second": 67.416, "eval_steps_per_second": 0.264, "step": 1000 }, { "epoch": 0.09, "grad_norm": 23.08890151977539, "learning_rate": 1.974702117432771e-05, "loss": 0.501, "step": 2000 }, { "epoch": 0.09, "eval_LOC_f1": 0.6875965698393078, "eval_ORG_f1": 0.5727174388363369, "eval_PER_f1": 0.7225324778200253, "eval_loss": 0.4574837386608124, "eval_overall_accuracy": 0.8565286056409621, "eval_overall_f1": 0.6654497292771804, "eval_overall_precision": 0.6761779246769729, "eval_overall_recall": 0.6550566427755974, "eval_runtime": 902.1725, "eval_samples_per_second": 72.824, "eval_steps_per_second": 0.285, "step": 2000 }, { "epoch": 0.13, "grad_norm": 20.49671745300293, "learning_rate": 1.9620531761491565e-05, "loss": 0.4505, "step": 3000 }, { "epoch": 0.13, "eval_LOC_f1": 0.7204635210044149, "eval_ORG_f1": 0.5725871273468899, "eval_PER_f1": 0.7433987684658955, "eval_loss": 0.4243098795413971, "eval_overall_accuracy": 0.8621471869792049, "eval_overall_f1": 0.6874798198516072, "eval_overall_precision": 0.6776606774900581, "eval_overall_recall": 0.6975877000398559, "eval_runtime": 876.2182, "eval_samples_per_second": 74.981, "eval_steps_per_second": 0.293, "step": 3000 }, { "epoch": 0.18, "grad_norm": 14.499987602233887, "learning_rate": 1.949404234865542e-05, "loss": 0.4268, "step": 4000 }, { "epoch": 0.18, "eval_LOC_f1": 0.7364175740245708, "eval_ORG_f1": 0.5861517860999524, "eval_PER_f1": 0.7518103620724146, "eval_loss": 0.4260491728782654, "eval_overall_accuracy": 0.8618999871026152, "eval_overall_f1": 0.6951561442521513, "eval_overall_precision": 0.6862764458838554, "eval_overall_recall": 0.7042686428026184, "eval_runtime": 900.7744, "eval_samples_per_second": 72.937, "eval_steps_per_second": 0.285, "step": 4000 }, { "epoch": 0.22, "grad_norm": 2.379225969314575, "learning_rate": 1.9367552935819272e-05, "loss": 0.4023, "step": 5000 }, { "epoch": 0.22, "eval_LOC_f1": 0.7258738398309293, "eval_ORG_f1": 0.6257472293742304, "eval_PER_f1": 0.7652099137004392, "eval_loss": 0.3998052477836609, "eval_overall_accuracy": 0.8690542423545061, "eval_overall_f1": 0.70826256852105, "eval_overall_precision": 0.7241968388877515, "eval_overall_recall": 0.6930143954388549, "eval_runtime": 879.0998, "eval_samples_per_second": 74.736, "eval_steps_per_second": 0.292, "step": 5000 }, { "epoch": 0.27, "grad_norm": 17.12338638305664, "learning_rate": 1.924106352298313e-05, "loss": 0.3733, "step": 6000 }, { "epoch": 0.27, "eval_LOC_f1": 0.7708459775529246, "eval_ORG_f1": 0.6382364519091983, "eval_PER_f1": 0.774621616895607, "eval_loss": 0.3535228669643402, "eval_overall_accuracy": 0.8847707173727775, "eval_overall_f1": 0.7347440116838627, "eval_overall_precision": 0.7320440692286544, "eval_overall_recall": 0.7374639438503577, "eval_runtime": 911.8464, "eval_samples_per_second": 72.052, "eval_steps_per_second": 0.282, "step": 6000 }, { "epoch": 0.31, "grad_norm": 45.46567153930664, "learning_rate": 1.9114574110146982e-05, "loss": 0.3636, "step": 7000 }, { "epoch": 0.31, "eval_LOC_f1": 0.7551567306422088, "eval_ORG_f1": 0.6445153971205689, "eval_PER_f1": 0.7953595339091327, "eval_loss": 0.36239349842071533, "eval_overall_accuracy": 0.8839519863237142, "eval_overall_f1": 0.7360533119495638, "eval_overall_precision": 0.7536363185945938, "eval_overall_recall": 0.7192720542041302, "eval_runtime": 934.2532, "eval_samples_per_second": 70.324, "eval_steps_per_second": 0.275, "step": 7000 }, { "epoch": 0.35, "grad_norm": 23.77419090270996, "learning_rate": 1.8988084697310836e-05, "loss": 0.3438, "step": 8000 }, { "epoch": 0.35, "eval_LOC_f1": 0.7777527075812274, "eval_ORG_f1": 0.6523009578392771, "eval_PER_f1": 0.7983586509404405, "eval_loss": 0.35361218452453613, "eval_overall_accuracy": 0.8877984416924404, "eval_overall_f1": 0.7468599888415638, "eval_overall_precision": 0.7522804372408594, "eval_overall_recall": 0.741517094161437, "eval_runtime": 964.9468, "eval_samples_per_second": 68.087, "eval_steps_per_second": 0.266, "step": 8000 }, { "epoch": 0.4, "grad_norm": 12.217660903930664, "learning_rate": 1.8861595284474693e-05, "loss": 0.3388, "step": 9000 }, { "epoch": 0.4, "eval_LOC_f1": 0.7769920027192643, "eval_ORG_f1": 0.6685048661658352, "eval_PER_f1": 0.7955169761564663, "eval_loss": 0.36109644174575806, "eval_overall_accuracy": 0.8892544552878013, "eval_overall_f1": 0.7515097063573983, "eval_overall_precision": 0.763163393230255, "eval_overall_recall": 0.7402065755608547, "eval_runtime": 935.4215, "eval_samples_per_second": 70.236, "eval_steps_per_second": 0.275, "step": 9000 }, { "epoch": 0.44, "grad_norm": 6.038209438323975, "learning_rate": 1.8735105871638546e-05, "loss": 0.34, "step": 10000 }, { "epoch": 0.44, "eval_LOC_f1": 0.7909373544148547, "eval_ORG_f1": 0.6797622124579994, "eval_PER_f1": 0.8097494515182176, "eval_loss": 0.31574323773384094, "eval_overall_accuracy": 0.8950133905201693, "eval_overall_f1": 0.7636732236459436, "eval_overall_precision": 0.7691121860312153, "eval_overall_recall": 0.7583106469503421, "eval_runtime": 875.8957, "eval_samples_per_second": 75.009, "eval_steps_per_second": 0.293, "step": 10000 }, { "epoch": 0.49, "grad_norm": 4.172688961029053, "learning_rate": 1.86086164588024e-05, "loss": 0.3234, "step": 11000 }, { "epoch": 0.49, "eval_LOC_f1": 0.7809025636945843, "eval_ORG_f1": 0.6583455896967046, "eval_PER_f1": 0.8088906849868661, "eval_loss": 0.3503168821334839, "eval_overall_accuracy": 0.8873938178535211, "eval_overall_f1": 0.7490455281410953, "eval_overall_precision": 0.7389521011220446, "eval_overall_recall": 0.7594185080353705, "eval_runtime": 876.8783, "eval_samples_per_second": 74.925, "eval_steps_per_second": 0.293, "step": 11000 }, { "epoch": 0.53, "grad_norm": 0.31278446316719055, "learning_rate": 1.8482127045966253e-05, "loss": 0.3116, "step": 12000 }, { "epoch": 0.53, "eval_LOC_f1": 0.8015539728054758, "eval_ORG_f1": 0.6780179794520548, "eval_PER_f1": 0.807255456750202, "eval_loss": 0.3222993314266205, "eval_overall_accuracy": 0.8972015203740747, "eval_overall_f1": 0.7650194669062593, "eval_overall_precision": 0.7538710756376371, "eval_overall_recall": 0.7765025365965696, "eval_runtime": 876.0279, "eval_samples_per_second": 74.998, "eval_steps_per_second": 0.293, "step": 12000 }, { "epoch": 0.58, "grad_norm": 17.869319915771484, "learning_rate": 1.8355637633130106e-05, "loss": 0.3195, "step": 13000 }, { "epoch": 0.58, "eval_LOC_f1": 0.8077397954482631, "eval_ORG_f1": 0.6910689787013923, "eval_PER_f1": 0.8216717076180746, "eval_loss": 0.3207678496837616, "eval_overall_accuracy": 0.8985247667722903, "eval_overall_f1": 0.7773739255449474, "eval_overall_precision": 0.7767264634475317, "eval_overall_recall": 0.7780224679632244, "eval_runtime": 885.6262, "eval_samples_per_second": 74.185, "eval_steps_per_second": 0.29, "step": 13000 }, { "epoch": 0.62, "grad_norm": 31.75470733642578, "learning_rate": 1.8229148220293963e-05, "loss": 0.3038, "step": 14000 }, { "epoch": 0.62, "eval_LOC_f1": 0.7926177313116938, "eval_ORG_f1": 0.6917118970756915, "eval_PER_f1": 0.8275611161079727, "eval_loss": 0.3353281319141388, "eval_overall_accuracy": 0.8932463223486391, "eval_overall_f1": 0.775302594053191, "eval_overall_precision": 0.7831411656868759, "eval_overall_recall": 0.7676193821647875, "eval_runtime": 929.7993, "eval_samples_per_second": 70.66, "eval_steps_per_second": 0.276, "step": 14000 }, { "epoch": 0.66, "grad_norm": 4.61831521987915, "learning_rate": 1.8102658807457817e-05, "loss": 0.3108, "step": 15000 }, { "epoch": 0.66, "eval_LOC_f1": 0.8077224388355109, "eval_ORG_f1": 0.6909212129572154, "eval_PER_f1": 0.8192221560207918, "eval_loss": 0.3174687623977661, "eval_overall_accuracy": 0.8962873233880165, "eval_overall_f1": 0.7771138787987528, "eval_overall_precision": 0.7788884364820847, "eval_overall_recall": 0.7753473887579121, "eval_runtime": 989.2234, "eval_samples_per_second": 66.416, "eval_steps_per_second": 0.26, "step": 15000 }, { "epoch": 0.71, "grad_norm": 12.568930625915527, "learning_rate": 1.7976169394621674e-05, "loss": 0.3071, "step": 16000 }, { "epoch": 0.71, "eval_LOC_f1": 0.794363707776905, "eval_ORG_f1": 0.6988674651653153, "eval_PER_f1": 0.830271581375005, "eval_loss": 0.32856282591819763, "eval_overall_accuracy": 0.8977907538395009, "eval_overall_f1": 0.7767913544942637, "eval_overall_precision": 0.7810995225703338, "eval_overall_recall": 0.772530449291712, "eval_runtime": 943.0369, "eval_samples_per_second": 69.669, "eval_steps_per_second": 0.273, "step": 16000 }, { "epoch": 0.75, "grad_norm": 5.375293731689453, "learning_rate": 1.7849679981785527e-05, "loss": 0.294, "step": 17000 }, { "epoch": 0.75, "eval_LOC_f1": 0.8179866156103497, "eval_ORG_f1": 0.7139503216415458, "eval_PER_f1": 0.8319541693189051, "eval_loss": 0.3004244267940521, "eval_overall_accuracy": 0.9056726997766982, "eval_overall_f1": 0.7925003906754269, "eval_overall_precision": 0.7971147209370665, "eval_overall_recall": 0.7879391757243317, "eval_runtime": 922.5388, "eval_samples_per_second": 71.217, "eval_steps_per_second": 0.279, "step": 17000 }, { "epoch": 0.8, "grad_norm": 15.245515823364258, "learning_rate": 1.772319056894938e-05, "loss": 0.3022, "step": 18000 }, { "epoch": 0.8, "eval_LOC_f1": 0.8157123906017681, "eval_ORG_f1": 0.7098208686331968, "eval_PER_f1": 0.8456405034257072, "eval_loss": 0.30179163813591003, "eval_overall_accuracy": 0.9067740352882565, "eval_overall_f1": 0.7949277311259202, "eval_overall_precision": 0.7816879334439119, "eval_overall_recall": 0.808623752811873, "eval_runtime": 877.0088, "eval_samples_per_second": 74.914, "eval_steps_per_second": 0.293, "step": 18000 }, { "epoch": 0.84, "grad_norm": 27.571258544921875, "learning_rate": 1.7596701156113234e-05, "loss": 0.2932, "step": 19000 }, { "epoch": 0.84, "eval_LOC_f1": 0.812069319938867, "eval_ORG_f1": 0.711144363521586, "eval_PER_f1": 0.8502128757579667, "eval_loss": 0.2992981970310211, "eval_overall_accuracy": 0.9061335916182172, "eval_overall_f1": 0.7960065863825535, "eval_overall_precision": 0.7903741392400006, "eval_overall_recall": 0.8017198867820013, "eval_runtime": 877.5146, "eval_samples_per_second": 74.871, "eval_steps_per_second": 0.293, "step": 19000 }, { "epoch": 0.89, "grad_norm": 6.182469844818115, "learning_rate": 1.747021174327709e-05, "loss": 0.2943, "step": 20000 }, { "epoch": 0.89, "eval_LOC_f1": 0.8233705131530693, "eval_ORG_f1": 0.7053565992474702, "eval_PER_f1": 0.8430738573647348, "eval_loss": 0.28671061992645264, "eval_overall_accuracy": 0.9084127618358795, "eval_overall_f1": 0.7974687389257328, "eval_overall_precision": 0.7968528685705035, "eval_overall_recall": 0.7980855620030669, "eval_runtime": 876.9521, "eval_samples_per_second": 74.919, "eval_steps_per_second": 0.293, "step": 20000 }, { "epoch": 0.93, "grad_norm": 37.97357177734375, "learning_rate": 1.7343722330440944e-05, "loss": 0.2782, "step": 21000 }, { "epoch": 0.93, "eval_LOC_f1": 0.8330484744899815, "eval_ORG_f1": 0.7210407632263661, "eval_PER_f1": 0.8489616418275103, "eval_loss": 0.30233901739120483, "eval_overall_accuracy": 0.906348548032643, "eval_overall_f1": 0.8060817413887736, "eval_overall_precision": 0.7987835211557213, "eval_overall_recall": 0.8135145541872421, "eval_runtime": 903.2924, "eval_samples_per_second": 72.734, "eval_steps_per_second": 0.285, "step": 21000 }, { "epoch": 0.97, "grad_norm": 3.23641037940979, "learning_rate": 1.7217232917604798e-05, "loss": 0.2913, "step": 22000 }, { "epoch": 0.97, "eval_LOC_f1": 0.8284559448131656, "eval_ORG_f1": 0.7247506440319704, "eval_PER_f1": 0.8467821489168608, "eval_loss": 0.2910088300704956, "eval_overall_accuracy": 0.908466500939486, "eval_overall_f1": 0.8031924311719724, "eval_overall_precision": 0.7945208195637806, "eval_overall_recall": 0.8120554200752534, "eval_runtime": 930.8988, "eval_samples_per_second": 70.577, "eval_steps_per_second": 0.276, "step": 22000 }, { "epoch": 1.02, "grad_norm": 51.232643127441406, "learning_rate": 1.709074350476865e-05, "loss": 0.254, "step": 23000 }, { "epoch": 1.02, "eval_LOC_f1": 0.8326820729529502, "eval_ORG_f1": 0.7281753443144438, "eval_PER_f1": 0.8538723624698639, "eval_loss": 0.3031412661075592, "eval_overall_accuracy": 0.9094907050317503, "eval_overall_f1": 0.8106029688042823, "eval_overall_precision": 0.8094644661502189, "eval_overall_recall": 0.8117446785514041, "eval_runtime": 982.2729, "eval_samples_per_second": 66.886, "eval_steps_per_second": 0.262, "step": 23000 }, { "epoch": 1.06, "grad_norm": 3.0182816982269287, "learning_rate": 1.6964254091932504e-05, "loss": 0.2412, "step": 24000 }, { "epoch": 1.06, "eval_LOC_f1": 0.8337292382788848, "eval_ORG_f1": 0.7265721539463927, "eval_PER_f1": 0.8438998504510589, "eval_loss": 0.2959749400615692, "eval_overall_accuracy": 0.9087023207706061, "eval_overall_f1": 0.8054477820887165, "eval_overall_precision": 0.7949914132873621, "eval_overall_recall": 0.8161828781420359, "eval_runtime": 937.946, "eval_samples_per_second": 70.047, "eval_steps_per_second": 0.274, "step": 24000 }, { "epoch": 1.11, "grad_norm": 4.651257038116455, "learning_rate": 1.683776467909636e-05, "loss": 0.2248, "step": 25000 }, { "epoch": 1.11, "eval_LOC_f1": 0.8204892221350009, "eval_ORG_f1": 0.7164497497985908, "eval_PER_f1": 0.8366153573083787, "eval_loss": 0.2870059013366699, "eval_overall_accuracy": 0.9095747909232758, "eval_overall_f1": 0.7954083144399056, "eval_overall_precision": 0.7841744564646571, "eval_overall_recall": 0.806968716434849, "eval_runtime": 880.5612, "eval_samples_per_second": 74.612, "eval_steps_per_second": 0.292, "step": 25000 }, { "epoch": 1.15, "grad_norm": 0.5376187562942505, "learning_rate": 1.6711275266260215e-05, "loss": 0.2367, "step": 26000 }, { "epoch": 1.15, "eval_LOC_f1": 0.8274943290245922, "eval_ORG_f1": 0.7181801646127961, "eval_PER_f1": 0.8393770384236834, "eval_loss": 0.3035840094089508, "eval_overall_accuracy": 0.9099459068505344, "eval_overall_f1": 0.7997951699758828, "eval_overall_precision": 0.7826674511984585, "eval_overall_recall": 0.8176892990076537, "eval_runtime": 877.1859, "eval_samples_per_second": 74.899, "eval_steps_per_second": 0.293, "step": 26000 }, { "epoch": 1.2, "grad_norm": 29.850025177001953, "learning_rate": 1.658478585342407e-05, "loss": 0.2259, "step": 27000 }, { "epoch": 1.2, "eval_LOC_f1": 0.8368737846096861, "eval_ORG_f1": 0.730566338210411, "eval_PER_f1": 0.8548625950405009, "eval_loss": 0.2981078624725342, "eval_overall_accuracy": 0.912430550111398, "eval_overall_f1": 0.8107802613802052, "eval_overall_precision": 0.8028279082088811, "eval_overall_recall": 0.8188917335999406, "eval_runtime": 900.9915, "eval_samples_per_second": 72.92, "eval_steps_per_second": 0.285, "step": 27000 }, { "epoch": 1.24, "grad_norm": 6.522253036499023, "learning_rate": 1.6458296440587925e-05, "loss": 0.2353, "step": 28000 }, { "epoch": 1.24, "eval_LOC_f1": 0.8361146769362431, "eval_ORG_f1": 0.7397288818401768, "eval_PER_f1": 0.851691836373991, "eval_loss": 0.2890784442424774, "eval_overall_accuracy": 0.9118988490980682, "eval_overall_f1": 0.813503140265178, "eval_overall_precision": 0.8080863288253761, "eval_overall_recall": 0.8189930623577175, "eval_runtime": 898.5924, "eval_samples_per_second": 73.114, "eval_steps_per_second": 0.286, "step": 28000 }, { "epoch": 1.28, "grad_norm": 18.48634910583496, "learning_rate": 1.633180702775178e-05, "loss": 0.231, "step": 29000 }, { "epoch": 1.28, "eval_LOC_f1": 0.8399307496708805, "eval_ORG_f1": 0.7353511607405231, "eval_PER_f1": 0.8429051875514639, "eval_loss": 0.29812344908714294, "eval_overall_accuracy": 0.9100799384971765, "eval_overall_f1": 0.8109618028412001, "eval_overall_precision": 0.8090263071203351, "eval_overall_recall": 0.8129065816405802, "eval_runtime": 911.3101, "eval_samples_per_second": 72.094, "eval_steps_per_second": 0.282, "step": 29000 }, { "epoch": 1.33, "grad_norm": 6.476167678833008, "learning_rate": 1.6205317614915632e-05, "loss": 0.2298, "step": 30000 }, { "epoch": 1.33, "eval_LOC_f1": 0.8437642148074813, "eval_ORG_f1": 0.741920341727885, "eval_PER_f1": 0.845791168353266, "eval_loss": 0.2789755165576935, "eval_overall_accuracy": 0.9128231616800994, "eval_overall_f1": 0.8156984934617233, "eval_overall_precision": 0.8007835888891781, "eval_overall_recall": 0.8311795342930293, "eval_runtime": 950.013, "eval_samples_per_second": 69.157, "eval_steps_per_second": 0.271, "step": 30000 }, { "epoch": 1.37, "grad_norm": 0.6922666430473328, "learning_rate": 1.607882820207949e-05, "loss": 0.2236, "step": 31000 }, { "epoch": 1.37, "eval_LOC_f1": 0.8401312445122232, "eval_ORG_f1": 0.7412946847115139, "eval_PER_f1": 0.8599101069965396, "eval_loss": 0.2861514985561371, "eval_overall_accuracy": 0.9133093425115507, "eval_overall_f1": 0.8168471254617229, "eval_overall_precision": 0.8077931685921699, "eval_overall_recall": 0.8261063411536617, "eval_runtime": 964.6377, "eval_samples_per_second": 68.108, "eval_steps_per_second": 0.266, "step": 31000 }, { "epoch": 1.42, "grad_norm": 10.913984298706055, "learning_rate": 1.5952338789243342e-05, "loss": 0.2164, "step": 32000 }, { "epoch": 1.42, "eval_LOC_f1": 0.8450018789928598, "eval_ORG_f1": 0.7453389102160086, "eval_PER_f1": 0.8475419561015748, "eval_loss": 0.29202836751937866, "eval_overall_accuracy": 0.9133548626934291, "eval_overall_f1": 0.8167286457267982, "eval_overall_precision": 0.8108745156006552, "eval_overall_recall": 0.8226679186397627, "eval_runtime": 934.7918, "eval_samples_per_second": 70.283, "eval_steps_per_second": 0.275, "step": 32000 }, { "epoch": 1.46, "grad_norm": 8.604541778564453, "learning_rate": 1.5825849376407196e-05, "loss": 0.2343, "step": 33000 }, { "epoch": 1.46, "eval_LOC_f1": 0.8327052539148251, "eval_ORG_f1": 0.7464142820374833, "eval_PER_f1": 0.8571568569804591, "eval_loss": 0.26980945467948914, "eval_overall_accuracy": 0.915141529832157, "eval_overall_f1": 0.8162628685387808, "eval_overall_precision": 0.8141182004502234, "eval_overall_recall": 0.818418866063648, "eval_runtime": 878.6904, "eval_samples_per_second": 74.77, "eval_steps_per_second": 0.292, "step": 33000 }, { "epoch": 1.51, "grad_norm": 20.011140823364258, "learning_rate": 1.569935996357105e-05, "loss": 0.2305, "step": 34000 }, { "epoch": 1.51, "eval_LOC_f1": 0.8434370154154885, "eval_ORG_f1": 0.7450794786844748, "eval_PER_f1": 0.8598302131901996, "eval_loss": 0.2736206650733948, "eval_overall_accuracy": 0.9164160949247526, "eval_overall_f1": 0.820116525352046, "eval_overall_precision": 0.8215285544822911, "eval_overall_recall": 0.818709341835942, "eval_runtime": 878.0774, "eval_samples_per_second": 74.823, "eval_steps_per_second": 0.293, "step": 34000 }, { "epoch": 1.55, "grad_norm": 0.8893330693244934, "learning_rate": 1.5572870550734906e-05, "loss": 0.218, "step": 35000 }, { "epoch": 1.55, "eval_LOC_f1": 0.8372996858861737, "eval_ORG_f1": 0.7351363688234623, "eval_PER_f1": 0.8452893909397927, "eval_loss": 0.277670681476593, "eval_overall_accuracy": 0.9132897435443531, "eval_overall_f1": 0.8105032765054125, "eval_overall_precision": 0.7914432306117588, "eval_overall_recall": 0.8305040092411827, "eval_runtime": 878.1965, "eval_samples_per_second": 74.812, "eval_steps_per_second": 0.293, "step": 35000 }, { "epoch": 1.59, "grad_norm": 2.807310104370117, "learning_rate": 1.544638113789876e-05, "loss": 0.2209, "step": 36000 }, { "epoch": 1.59, "eval_LOC_f1": 0.8436019819082686, "eval_ORG_f1": 0.7541017701160051, "eval_PER_f1": 0.8559255699664113, "eval_loss": 0.2975883483886719, "eval_overall_accuracy": 0.9155777649084917, "eval_overall_f1": 0.8217433690792348, "eval_overall_precision": 0.8178831213153369, "eval_overall_recall": 0.8256402288678876, "eval_runtime": 887.7886, "eval_samples_per_second": 74.004, "eval_steps_per_second": 0.289, "step": 36000 }, { "epoch": 1.64, "grad_norm": 0.7378529906272888, "learning_rate": 1.5319891725062616e-05, "loss": 0.2068, "step": 37000 }, { "epoch": 1.64, "eval_LOC_f1": 0.8404596277816221, "eval_ORG_f1": 0.7519756060658962, "eval_PER_f1": 0.8650594959056045, "eval_loss": 0.2906901240348816, "eval_overall_accuracy": 0.9159109473508519, "eval_overall_f1": 0.8231073274551537, "eval_overall_precision": 0.8232630746670091, "eval_overall_recall": 0.8229516391615384, "eval_runtime": 914.9939, "eval_samples_per_second": 71.804, "eval_steps_per_second": 0.281, "step": 37000 }, { "epoch": 1.68, "grad_norm": 4.775814056396484, "learning_rate": 1.5193402312226468e-05, "loss": 0.2222, "step": 38000 }, { "epoch": 1.68, "eval_LOC_f1": 0.8486954241510423, "eval_ORG_f1": 0.7403596163509645, "eval_PER_f1": 0.8627163820626227, "eval_loss": 0.2920599579811096, "eval_overall_accuracy": 0.9144530370812459, "eval_overall_f1": 0.8204945751023299, "eval_overall_precision": 0.8079300635190885, "eval_overall_recall": 0.8334560537177521, "eval_runtime": 974.0136, "eval_samples_per_second": 67.453, "eval_steps_per_second": 0.264, "step": 38000 }, { "epoch": 1.73, "grad_norm": 2.2256317138671875, "learning_rate": 1.5066912899390323e-05, "loss": 0.2328, "step": 39000 }, { "epoch": 1.73, "eval_LOC_f1": 0.849334397801749, "eval_ORG_f1": 0.753236617390506, "eval_PER_f1": 0.8693573280340553, "eval_loss": 0.29489845037460327, "eval_overall_accuracy": 0.917294255100157, "eval_overall_f1": 0.8276056778793333, "eval_overall_precision": 0.8335114765330592, "eval_overall_recall": 0.8217829808218438, "eval_runtime": 944.9062, "eval_samples_per_second": 69.531, "eval_steps_per_second": 0.272, "step": 39000 }, { "epoch": 1.77, "grad_norm": 16.9512996673584, "learning_rate": 1.4940423486554176e-05, "loss": 0.2229, "step": 40000 }, { "epoch": 1.77, "eval_LOC_f1": 0.8445889009269291, "eval_ORG_f1": 0.7452847675981278, "eval_PER_f1": 0.8508496270046708, "eval_loss": 0.27226653695106506, "eval_overall_accuracy": 0.916396495957555, "eval_overall_f1": 0.818113712374582, "eval_overall_precision": 0.8101638106341121, "eval_overall_recall": 0.8262211804124756, "eval_runtime": 919.492, "eval_samples_per_second": 71.452, "eval_steps_per_second": 0.28, "step": 40000 }, { "epoch": 1.82, "grad_norm": 0.6652330160140991, "learning_rate": 1.481393407371803e-05, "loss": 0.2219, "step": 41000 }, { "epoch": 1.82, "eval_LOC_f1": 0.8483445744353834, "eval_ORG_f1": 0.7490041659533185, "eval_PER_f1": 0.865145374272123, "eval_loss": 0.2795349359512329, "eval_overall_accuracy": 0.9183601860258099, "eval_overall_f1": 0.8247159453593771, "eval_overall_precision": 0.8203393908609086, "eval_overall_recall": 0.8291394486364527, "eval_runtime": 878.0472, "eval_samples_per_second": 74.825, "eval_steps_per_second": 0.293, "step": 41000 }, { "epoch": 1.86, "grad_norm": 8.537057876586914, "learning_rate": 1.4687444660881885e-05, "loss": 0.2265, "step": 42000 }, { "epoch": 1.86, "eval_LOC_f1": 0.8431806420528559, "eval_ORG_f1": 0.7481651106805237, "eval_PER_f1": 0.8624314888139032, "eval_loss": 0.2695271372795105, "eval_overall_accuracy": 0.9176305986662586, "eval_overall_f1": 0.8227747180347664, "eval_overall_precision": 0.8127298369558054, "eval_overall_recall": 0.8330710044381996, "eval_runtime": 878.4759, "eval_samples_per_second": 74.789, "eval_steps_per_second": 0.293, "step": 42000 }, { "epoch": 1.9, "grad_norm": 2.5510284900665283, "learning_rate": 1.456095524804574e-05, "loss": 0.208, "step": 43000 }, { "epoch": 1.9, "eval_LOC_f1": 0.8455445895423215, "eval_ORG_f1": 0.7595170903640558, "eval_PER_f1": 0.8683249226114047, "eval_loss": 0.291418194770813, "eval_overall_accuracy": 0.9158768072144431, "eval_overall_f1": 0.828613510075651, "eval_overall_precision": 0.8212624496539643, "eval_overall_recall": 0.8360973566704721, "eval_runtime": 879.88, "eval_samples_per_second": 74.669, "eval_steps_per_second": 0.292, "step": 43000 }, { "epoch": 1.95, "grad_norm": 40.04784393310547, "learning_rate": 1.4434465835209595e-05, "loss": 0.2201, "step": 44000 }, { "epoch": 1.95, "eval_LOC_f1": 0.8562697361719467, "eval_ORG_f1": 0.7598152424942263, "eval_PER_f1": 0.8635555196248839, "eval_loss": 0.26673147082328796, "eval_overall_accuracy": 0.9210597857010994, "eval_overall_f1": 0.8302701818813599, "eval_overall_precision": 0.8374682152429387, "eval_overall_recall": 0.8231948281802031, "eval_runtime": 908.7842, "eval_samples_per_second": 72.294, "eval_steps_per_second": 0.283, "step": 44000 }, { "epoch": 1.99, "grad_norm": 4.13014030456543, "learning_rate": 1.4307976422373449e-05, "loss": 0.2065, "step": 45000 }, { "epoch": 1.99, "eval_LOC_f1": 0.850374667633551, "eval_ORG_f1": 0.7572422253856926, "eval_PER_f1": 0.8662861965717501, "eval_loss": 0.260405570268631, "eval_overall_accuracy": 0.9184101317809266, "eval_overall_f1": 0.8283524878105291, "eval_overall_precision": 0.835684527492472, "eval_overall_recall": 0.821147987273108, "eval_runtime": 930.7338, "eval_samples_per_second": 70.589, "eval_steps_per_second": 0.276, "step": 45000 } ], "logging_steps": 1000, "max_steps": 158116, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1.4980271080173432e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }