|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 1215, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_B": { |
|
"f1-score": 0.8087666523420713, |
|
"precision": 0.8189730200174065, |
|
"recall": 0.7988115449915111, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9374597034171502, |
|
"precision": 0.9521418826739427, |
|
"recall": 0.9232234509762421, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8881362400419788, |
|
"precision": 0.863303347862376, |
|
"recall": 0.9144400785854617, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9154245298608586, |
|
"eval_loss": 0.2228638380765915, |
|
"eval_macro avg": { |
|
"f1-score": 0.8781208652670669, |
|
"precision": 0.8781394168512416, |
|
"recall": 0.8788250248510717, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7167, |
|
"eval_samples_per_second": 17.173, |
|
"eval_steps_per_second": 2.332, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9158543469268937, |
|
"precision": 0.9170673807540184, |
|
"recall": 0.9154245298608586, |
|
"support": 30257.0 |
|
}, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_B": { |
|
"f1-score": 0.8401515151515152, |
|
"precision": 0.7585499316005472, |
|
"recall": 0.9414261460101867, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9568235603470944, |
|
"precision": 0.9510219016256338, |
|
"recall": 0.9626964389650247, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9185648054827655, |
|
"precision": 0.9430877483443708, |
|
"recall": 0.8952848722986247, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9391876260038999, |
|
"eval_loss": 0.16692574322223663, |
|
"eval_macro avg": { |
|
"f1-score": 0.905179960327125, |
|
"precision": 0.8842198605235172, |
|
"recall": 0.9331358190912787, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7116, |
|
"eval_samples_per_second": 17.192, |
|
"eval_steps_per_second": 2.335, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9394089523635119, |
|
"precision": 0.9408589092241132, |
|
"recall": 0.9391876260038999, |
|
"support": 30257.0 |
|
}, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_B": { |
|
"f1-score": 0.8612244897959184, |
|
"precision": 0.8294025157232704, |
|
"recall": 0.8955857385398981, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.951106371634231, |
|
"precision": 0.9584654236741712, |
|
"recall": 0.9438594634636753, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9106743212999903, |
|
"precision": 0.9021592442645074, |
|
"recall": 0.9193516699410609, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9337343424662061, |
|
"eval_loss": 0.1742253601551056, |
|
"eval_macro avg": { |
|
"f1-score": 0.9076683942433799, |
|
"precision": 0.8966757278873163, |
|
"recall": 0.9195989573148781, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7094, |
|
"eval_samples_per_second": 17.2, |
|
"eval_steps_per_second": 2.336, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9340035812317422, |
|
"precision": 0.9344963252190521, |
|
"recall": 0.9337343424662061, |
|
"support": 30257.0 |
|
}, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_B": { |
|
"f1-score": 0.8717948717948719, |
|
"precision": 0.8254931714719271, |
|
"recall": 0.9235993208828522, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9561271586371415, |
|
"precision": 0.9374586871408959, |
|
"recall": 0.9755542621302714, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9117828500925355, |
|
"precision": 0.956427955133736, |
|
"recall": 0.8711198428290766, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9383944211256899, |
|
"eval_loss": 0.18792153894901276, |
|
"eval_macro avg": { |
|
"f1-score": 0.9132349601748496, |
|
"precision": 0.9064599379155197, |
|
"recall": 0.9234244752807333, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7342, |
|
"eval_samples_per_second": 17.11, |
|
"eval_steps_per_second": 2.324, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9379241479327001, |
|
"precision": 0.9394817486046585, |
|
"recall": 0.9383944211256899, |
|
"support": 30257.0 |
|
}, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_B": { |
|
"f1-score": 0.8742949234488315, |
|
"precision": 0.8320552147239264, |
|
"recall": 0.9210526315789473, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9583957528450158, |
|
"precision": 0.9521148825065274, |
|
"recall": 0.9647600402137679, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9198818996146724, |
|
"precision": 0.9375701315923697, |
|
"recall": 0.9028487229862475, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9422282447037049, |
|
"eval_loss": 0.19990424811840057, |
|
"eval_macro avg": { |
|
"f1-score": 0.9175241919695066, |
|
"precision": 0.9072467429409411, |
|
"recall": 0.9295537982596542, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7101, |
|
"eval_samples_per_second": 17.197, |
|
"eval_steps_per_second": 2.335, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9421634164298524, |
|
"precision": 0.9425469857238316, |
|
"recall": 0.9422282447037049, |
|
"support": 30257.0 |
|
}, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_B": { |
|
"f1-score": 0.8808247422680412, |
|
"precision": 0.8564554931836408, |
|
"recall": 0.9066213921901528, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.957406827435014, |
|
"precision": 0.9446361435855178, |
|
"recall": 0.9705275411397428, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9163000050574014, |
|
"precision": 0.9443344105076619, |
|
"recall": 0.8898821218074656, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9409062365733549, |
|
"eval_loss": 0.20615626871585846, |
|
"eval_macro avg": { |
|
"f1-score": 0.9181771915868189, |
|
"precision": 0.9151420157589403, |
|
"recall": 0.922343685045787, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.6949, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 2.343, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9405948120953971, |
|
"precision": 0.941101475610967, |
|
"recall": 0.9409062365733549, |
|
"support": 30257.0 |
|
}, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"grad_norm": 2.4104158878326416, |
|
"learning_rate": 1.7530864197530865e-05, |
|
"loss": 0.1662, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_B": { |
|
"f1-score": 0.8836820083682009, |
|
"precision": 0.8712871287128713, |
|
"recall": 0.8964346349745331, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9614174269469787, |
|
"precision": 0.9609854091774159, |
|
"recall": 0.9618498333245146, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9271751440248166, |
|
"precision": 0.9295093296475466, |
|
"recall": 0.924852652259332, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9468552731599299, |
|
"eval_loss": 0.22925782203674316, |
|
"eval_macro avg": { |
|
"f1-score": 0.9240915264466655, |
|
"precision": 0.9205939558459445, |
|
"recall": 0.9277123735194599, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7288, |
|
"eval_samples_per_second": 17.129, |
|
"eval_steps_per_second": 2.326, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9468700903559945, |
|
"precision": 0.9469030129054358, |
|
"recall": 0.9468552731599299, |
|
"support": 30257.0 |
|
}, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_B": { |
|
"f1-score": 0.8803630363036303, |
|
"precision": 0.8563402889245586, |
|
"recall": 0.9057724957555179, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9580651967507121, |
|
"precision": 0.9550928116947994, |
|
"recall": 0.9610561405365363, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9208882720333101, |
|
"precision": 0.9294576746047628, |
|
"recall": 0.912475442043222, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9425587467362925, |
|
"eval_loss": 0.22973769903182983, |
|
"eval_macro avg": { |
|
"f1-score": 0.9197721683625509, |
|
"precision": 0.9136302584080402, |
|
"recall": 0.9264346927784253, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7191, |
|
"eval_samples_per_second": 17.164, |
|
"eval_steps_per_second": 2.331, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9425317916335554, |
|
"precision": 0.942623096673485, |
|
"recall": 0.9425587467362925, |
|
"support": 30257.0 |
|
}, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_B": { |
|
"f1-score": 0.8885191347753745, |
|
"precision": 0.8711256117455138, |
|
"recall": 0.9066213921901528, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9620326487625066, |
|
"precision": 0.9574445783763954, |
|
"recall": 0.9666649029049156, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9275707898658718, |
|
"precision": 0.9382914572864322, |
|
"recall": 0.9170923379174852, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9476484780381399, |
|
"eval_loss": 0.24401217699050903, |
|
"eval_macro avg": { |
|
"f1-score": 0.9260408578012509, |
|
"precision": 0.9222872158027805, |
|
"recall": 0.9301262110041845, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.701, |
|
"eval_samples_per_second": 17.23, |
|
"eval_steps_per_second": 2.34, |
|
"eval_weighted avg": { |
|
"f1-score": 0.94757580760031, |
|
"precision": 0.9476398219436029, |
|
"recall": 0.9476484780381399, |
|
"support": 30257.0 |
|
}, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_B": { |
|
"f1-score": 0.875959595959596, |
|
"precision": 0.8357748650732459, |
|
"recall": 0.9202037351443124, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.955652945166171, |
|
"precision": 0.9424749163879599, |
|
"recall": 0.9692047198264458, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9127632580563309, |
|
"precision": 0.9441469816272966, |
|
"recall": 0.8833988212180747, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9384274713289487, |
|
"eval_loss": 0.29471203684806824, |
|
"eval_macro avg": { |
|
"f1-score": 0.9147919330606994, |
|
"precision": 0.9074655876961675, |
|
"recall": 0.9242690920629443, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.6985, |
|
"eval_samples_per_second": 17.24, |
|
"eval_steps_per_second": 2.341, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9381199518045186, |
|
"precision": 0.9388833165494999, |
|
"recall": 0.9384274713289487, |
|
"support": 30257.0 |
|
}, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_B": { |
|
"f1-score": 0.875515251442704, |
|
"precision": 0.8509615384615384, |
|
"recall": 0.9015280135823429, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9575287538250502, |
|
"precision": 0.9547582723972855, |
|
"recall": 0.96031536060109, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9195242814667988, |
|
"precision": 0.9278, |
|
"recall": 0.9113948919449901, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9415672406385299, |
|
"eval_loss": 0.30941879749298096, |
|
"eval_macro avg": { |
|
"f1-score": 0.9175227622448511, |
|
"precision": 0.9111732702862746, |
|
"recall": 0.9244127553761411, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7017, |
|
"eval_samples_per_second": 17.228, |
|
"eval_steps_per_second": 2.34, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9415490653426031, |
|
"precision": 0.9416470001105195, |
|
"recall": 0.9415672406385299, |
|
"support": 30257.0 |
|
}, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_B": { |
|
"f1-score": 0.8818181818181817, |
|
"precision": 0.8590982286634461, |
|
"recall": 0.9057724957555179, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9542487087386278, |
|
"precision": 0.9457436856875585, |
|
"recall": 0.9629080903751521, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.910740239562973, |
|
"precision": 0.9297042873222142, |
|
"recall": 0.8925343811394892, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9370063125888224, |
|
"eval_loss": 0.28610122203826904, |
|
"eval_macro avg": { |
|
"f1-score": 0.9156023767065942, |
|
"precision": 0.9115154005577396, |
|
"recall": 0.9204049890900531, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.6911, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 2.345, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9367902899621314, |
|
"precision": 0.9369738332985704, |
|
"recall": 0.9370063125888224, |
|
"support": 30257.0 |
|
}, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"grad_norm": 0.9797911643981934, |
|
"learning_rate": 1.506172839506173e-05, |
|
"loss": 0.0233, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_B": { |
|
"f1-score": 0.8875154511742892, |
|
"precision": 0.8622898318654924, |
|
"recall": 0.9142614601018676, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9602009576889865, |
|
"precision": 0.9497877627083549, |
|
"recall": 0.9708450182549341, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9229994967287367, |
|
"precision": 0.9463364293085655, |
|
"recall": 0.9007858546168959, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9450705621839575, |
|
"eval_loss": 0.3311145305633545, |
|
"eval_macro avg": { |
|
"f1-score": 0.9235719685306708, |
|
"precision": 0.9194713412941375, |
|
"recall": 0.9286307776578991, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7187, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.331, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9448546114137558, |
|
"precision": 0.9452199887538072, |
|
"recall": 0.9450705621839575, |
|
"support": 30257.0 |
|
}, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_B": { |
|
"f1-score": 0.8758762886597938, |
|
"precision": 0.8516439454691259, |
|
"recall": 0.9015280135823429, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9588910891089109, |
|
"precision": 0.9569456155143339, |
|
"recall": 0.9608444891264089, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9223310576827941, |
|
"precision": 0.9290412597169623, |
|
"recall": 0.9157170923379175, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9433519516145025, |
|
"eval_loss": 0.32673054933547974, |
|
"eval_macro avg": { |
|
"f1-score": 0.9190328118171663, |
|
"precision": 0.9125436069001407, |
|
"recall": 0.9260298650155564, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.7171, |
|
"eval_samples_per_second": 17.171, |
|
"eval_steps_per_second": 2.332, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9433584006451857, |
|
"precision": 0.943457440568685, |
|
"recall": 0.9433519516145025, |
|
"support": 30257.0 |
|
}, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_B": { |
|
"f1-score": 0.8844750709363599, |
|
"precision": 0.8463925523661753, |
|
"recall": 0.9261460101867572, |
|
"support": 1178.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9554282158321882, |
|
"precision": 0.9367500508440105, |
|
"recall": 0.974866395047357, |
|
"support": 18899.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9105749486652978, |
|
"precision": 0.9536559139784946, |
|
"recall": 0.8712180746561886, |
|
"support": 10180.0 |
|
}, |
|
"eval_accuracy": 0.9380969692963612, |
|
"eval_loss": 0.4276476800441742, |
|
"eval_macro avg": { |
|
"f1-score": 0.9168260784779486, |
|
"precision": 0.9122661723962269, |
|
"recall": 0.924076826630101, |
|
"support": 30257.0 |
|
}, |
|
"eval_runtime": 4.6973, |
|
"eval_samples_per_second": 17.244, |
|
"eval_steps_per_second": 2.342, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9375748574540862, |
|
"precision": 0.9389201454833387, |
|
"recall": 0.9380969692963612, |
|
"support": 30257.0 |
|
}, |
|
"step": 1215 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2150101655037000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|