policy-distilbert-7d / trainer_state.json
MoritzLaurer's picture
MoritzLaurer HF staff
from Google Colab
86c1bc3
raw history blame
No virus
42.7 kB
{
"best_metric": 0.7449912424577604,
"best_model_checkpoint": "./results/classi_distilbert/checkpoint-137775",
"epoch": 5.0,
"global_step": 137775,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018145527127563055,
"learning_rate": 4e-05,
"loss": 0.84686328125,
"step": 500
},
{
"epoch": 0.03629105425512611,
"learning_rate": 3.9854307047896564e-05,
"loss": 0.7841121826171875,
"step": 1000
},
{
"epoch": 0.05443658138268917,
"learning_rate": 3.970861409579312e-05,
"loss": 0.7334760131835938,
"step": 1500
},
{
"epoch": 0.07258210851025222,
"learning_rate": 3.956292114368968e-05,
"loss": 0.6972730712890625,
"step": 2000
},
{
"epoch": 0.09072763563781527,
"learning_rate": 3.941722819158623e-05,
"loss": 0.7338438720703125,
"step": 2500
},
{
"epoch": 0.10887316276537834,
"learning_rate": 3.927153523948279e-05,
"loss": 0.7204794921875,
"step": 3000
},
{
"epoch": 0.12701868989294138,
"learning_rate": 3.912584228737935e-05,
"loss": 0.654921630859375,
"step": 3500
},
{
"epoch": 0.14516421702050444,
"learning_rate": 3.898014933527591e-05,
"loss": 0.6844443359375,
"step": 4000
},
{
"epoch": 0.1633097441480675,
"learning_rate": 3.8834456383172466e-05,
"loss": 0.7079535522460938,
"step": 4500
},
{
"epoch": 0.18145527127563055,
"learning_rate": 3.8688763431069026e-05,
"loss": 0.6477606201171875,
"step": 5000
},
{
"epoch": 0.1996007984031936,
"learning_rate": 3.8543070478965586e-05,
"loss": 0.6153006591796875,
"step": 5500
},
{
"epoch": 0.21774632553075668,
"learning_rate": 3.839737752686214e-05,
"loss": 0.6489894409179687,
"step": 6000
},
{
"epoch": 0.23589185265831972,
"learning_rate": 3.82516845747587e-05,
"loss": 0.678625244140625,
"step": 6500
},
{
"epoch": 0.25403737978588276,
"learning_rate": 3.8105991622655254e-05,
"loss": 0.65478857421875,
"step": 7000
},
{
"epoch": 0.2721829069134458,
"learning_rate": 3.7960298670551814e-05,
"loss": 0.63389208984375,
"step": 7500
},
{
"epoch": 0.2903284340410089,
"learning_rate": 3.7814605718448375e-05,
"loss": 0.5931556396484375,
"step": 8000
},
{
"epoch": 0.30847396116857195,
"learning_rate": 3.7668912766344935e-05,
"loss": 0.5931373291015625,
"step": 8500
},
{
"epoch": 0.326619488296135,
"learning_rate": 3.752321981424149e-05,
"loss": 0.570529296875,
"step": 9000
},
{
"epoch": 0.3447650154236981,
"learning_rate": 3.737752686213805e-05,
"loss": 1.0059749145507813,
"step": 9500
},
{
"epoch": 0.3629105425512611,
"learning_rate": 3.723183391003461e-05,
"loss": 0.8932774658203125,
"step": 10000
},
{
"epoch": 0.38105606967882416,
"learning_rate": 3.708614095793116e-05,
"loss": 0.9196145629882813,
"step": 10500
},
{
"epoch": 0.3992015968063872,
"learning_rate": 3.694044800582772e-05,
"loss": 0.8962069702148437,
"step": 11000
},
{
"epoch": 0.4173471239339503,
"learning_rate": 3.679475505372428e-05,
"loss": 0.899446044921875,
"step": 11500
},
{
"epoch": 0.43549265106151336,
"learning_rate": 3.664906210162084e-05,
"loss": 0.8438430786132812,
"step": 12000
},
{
"epoch": 0.45363817818907637,
"learning_rate": 3.650336914951739e-05,
"loss": 0.8932066040039063,
"step": 12500
},
{
"epoch": 0.47178370531663943,
"learning_rate": 3.635767619741395e-05,
"loss": 0.8947886962890625,
"step": 13000
},
{
"epoch": 0.4899292324442025,
"learning_rate": 3.621198324531051e-05,
"loss": 0.8659691772460938,
"step": 13500
},
{
"epoch": 0.5080747595717655,
"learning_rate": 3.606629029320707e-05,
"loss": 0.8590133666992188,
"step": 14000
},
{
"epoch": 0.5262202866993286,
"learning_rate": 3.592059734110363e-05,
"loss": 0.826283447265625,
"step": 14500
},
{
"epoch": 0.5443658138268916,
"learning_rate": 3.5774904389000186e-05,
"loss": 0.8533541870117187,
"step": 15000
},
{
"epoch": 0.5625113409544548,
"learning_rate": 3.5629211436896746e-05,
"loss": 0.8348355102539062,
"step": 15500
},
{
"epoch": 0.5806568680820178,
"learning_rate": 3.54835184847933e-05,
"loss": 0.837393310546875,
"step": 16000
},
{
"epoch": 0.5988023952095808,
"learning_rate": 3.533782553268986e-05,
"loss": 0.8255460205078125,
"step": 16500
},
{
"epoch": 0.6169479223371439,
"learning_rate": 3.519213258058641e-05,
"loss": 0.850743896484375,
"step": 17000
},
{
"epoch": 0.6350934494647069,
"learning_rate": 3.5046439628482974e-05,
"loss": 0.822864501953125,
"step": 17500
},
{
"epoch": 0.65323897659227,
"learning_rate": 3.4900746676379534e-05,
"loss": 0.848374267578125,
"step": 18000
},
{
"epoch": 0.671384503719833,
"learning_rate": 3.4755053724276094e-05,
"loss": 0.8317371826171875,
"step": 18500
},
{
"epoch": 0.6895300308473962,
"learning_rate": 3.460936077217265e-05,
"loss": 0.8403475341796875,
"step": 19000
},
{
"epoch": 0.7076755579749592,
"learning_rate": 3.446366782006921e-05,
"loss": 0.8143458862304688,
"step": 19500
},
{
"epoch": 0.7258210851025222,
"learning_rate": 3.431797486796577e-05,
"loss": 0.834985595703125,
"step": 20000
},
{
"epoch": 0.7439666122300853,
"learning_rate": 3.417228191586232e-05,
"loss": 0.80064453125,
"step": 20500
},
{
"epoch": 0.7621121393576483,
"learning_rate": 3.402658896375888e-05,
"loss": 0.8269353637695313,
"step": 21000
},
{
"epoch": 0.7802576664852114,
"learning_rate": 3.3880896011655436e-05,
"loss": 0.8046528930664063,
"step": 21500
},
{
"epoch": 0.7984031936127745,
"learning_rate": 3.3735203059551996e-05,
"loss": 0.7941847534179688,
"step": 22000
},
{
"epoch": 0.8165487207403375,
"learning_rate": 3.358951010744855e-05,
"loss": 0.8443350219726562,
"step": 22500
},
{
"epoch": 0.8346942478679006,
"learning_rate": 3.344381715534511e-05,
"loss": 0.8400850219726562,
"step": 23000
},
{
"epoch": 0.8528397749954636,
"learning_rate": 3.329812420324167e-05,
"loss": 0.7925293579101562,
"step": 23500
},
{
"epoch": 0.8709853021230267,
"learning_rate": 3.315243125113823e-05,
"loss": 0.809871337890625,
"step": 24000
},
{
"epoch": 0.8891308292505897,
"learning_rate": 3.300673829903479e-05,
"loss": 0.8267896728515625,
"step": 24500
},
{
"epoch": 0.9072763563781527,
"learning_rate": 3.2861045346931345e-05,
"loss": 0.7969192504882813,
"step": 25000
},
{
"epoch": 0.9254218835057159,
"learning_rate": 3.2715352394827905e-05,
"loss": 0.821365234375,
"step": 25500
},
{
"epoch": 0.9435674106332789,
"learning_rate": 3.256965944272446e-05,
"loss": 0.8019456787109375,
"step": 26000
},
{
"epoch": 0.961712937760842,
"learning_rate": 3.242396649062102e-05,
"loss": 0.7933065795898437,
"step": 26500
},
{
"epoch": 0.979858464888405,
"learning_rate": 3.227827353851757e-05,
"loss": 0.833029296875,
"step": 27000
},
{
"epoch": 0.998003992015968,
"learning_rate": 3.213258058641413e-05,
"loss": 0.8048839111328125,
"step": 27500
},
{
"epoch": 1.0,
"eval_accuracy": 0.673336556959681,
"eval_accuracy_not_balanced": 0.719448871523315,
"eval_f1": 0.7166208604108357,
"eval_loss": 0.8470854759216309,
"eval_precision": 0.721835226975883,
"eval_recall": 0.719448871523315,
"step": 27555
},
{
"epoch": 1.016149519143531,
"learning_rate": 3.1986887634310694e-05,
"loss": 0.6081535034179687,
"step": 28000
},
{
"epoch": 1.0342950462710943,
"learning_rate": 3.184119468220725e-05,
"loss": 0.6581923217773438,
"step": 28500
},
{
"epoch": 1.0524405733986573,
"learning_rate": 3.169550173010381e-05,
"loss": 0.6415296020507812,
"step": 29000
},
{
"epoch": 1.0705861005262203,
"learning_rate": 3.154980877800037e-05,
"loss": 0.636827392578125,
"step": 29500
},
{
"epoch": 1.0887316276537833,
"learning_rate": 3.140411582589693e-05,
"loss": 0.6284669189453125,
"step": 30000
},
{
"epoch": 1.1068771547813463,
"learning_rate": 3.125842287379348e-05,
"loss": 0.6636114501953125,
"step": 30500
},
{
"epoch": 1.1250226819089095,
"learning_rate": 3.111272992169004e-05,
"loss": 0.7081345825195312,
"step": 31000
},
{
"epoch": 1.1431682090364725,
"learning_rate": 3.0967036969586596e-05,
"loss": 0.6273524169921875,
"step": 31500
},
{
"epoch": 1.1613137361640355,
"learning_rate": 3.0821344017483156e-05,
"loss": 0.6474124145507812,
"step": 32000
},
{
"epoch": 1.1794592632915986,
"learning_rate": 3.0675651065379716e-05,
"loss": 0.624934326171875,
"step": 32500
},
{
"epoch": 1.1976047904191618,
"learning_rate": 3.052995811327627e-05,
"loss": 0.627742431640625,
"step": 33000
},
{
"epoch": 1.2157503175467248,
"learning_rate": 3.0384265161172834e-05,
"loss": 0.6321390380859375,
"step": 33500
},
{
"epoch": 1.2338958446742878,
"learning_rate": 3.0238572209069387e-05,
"loss": 0.6632438354492187,
"step": 34000
},
{
"epoch": 1.2520413718018508,
"learning_rate": 3.0092879256965948e-05,
"loss": 0.687524169921875,
"step": 34500
},
{
"epoch": 1.2701868989294138,
"learning_rate": 2.9947186304862504e-05,
"loss": 0.6545087280273437,
"step": 35000
},
{
"epoch": 1.2883324260569768,
"learning_rate": 2.9801493352759065e-05,
"loss": 0.67624365234375,
"step": 35500
},
{
"epoch": 1.30647795318454,
"learning_rate": 2.965580040065562e-05,
"loss": 0.6588964233398438,
"step": 36000
},
{
"epoch": 1.324623480312103,
"learning_rate": 2.951010744855218e-05,
"loss": 0.696208251953125,
"step": 36500
},
{
"epoch": 1.342769007439666,
"learning_rate": 2.936441449644874e-05,
"loss": 0.6372421264648438,
"step": 37000
},
{
"epoch": 1.360914534567229,
"learning_rate": 2.9218721544345293e-05,
"loss": 0.7146538696289062,
"step": 37500
},
{
"epoch": 1.3790600616947923,
"learning_rate": 2.9073028592241853e-05,
"loss": 0.626543212890625,
"step": 38000
},
{
"epoch": 1.3972055888223553,
"learning_rate": 2.892733564013841e-05,
"loss": 0.6519877319335937,
"step": 38500
},
{
"epoch": 1.4153511159499184,
"learning_rate": 2.878164268803497e-05,
"loss": 0.653167724609375,
"step": 39000
},
{
"epoch": 1.4334966430774814,
"learning_rate": 2.8635949735931524e-05,
"loss": 0.6557488403320313,
"step": 39500
},
{
"epoch": 1.4516421702050444,
"learning_rate": 2.8490256783828084e-05,
"loss": 0.6946817016601563,
"step": 40000
},
{
"epoch": 1.4697876973326074,
"learning_rate": 2.834456383172464e-05,
"loss": 0.6515726318359375,
"step": 40500
},
{
"epoch": 1.4879332244601706,
"learning_rate": 2.81988708796212e-05,
"loss": 0.6651732177734375,
"step": 41000
},
{
"epoch": 1.5060787515877336,
"learning_rate": 2.8053177927517762e-05,
"loss": 0.6083211669921875,
"step": 41500
},
{
"epoch": 1.5242242787152966,
"learning_rate": 2.7907484975414315e-05,
"loss": 0.6926116943359375,
"step": 42000
},
{
"epoch": 1.5423698058428599,
"learning_rate": 2.7761792023310876e-05,
"loss": 0.6213585815429687,
"step": 42500
},
{
"epoch": 1.5605153329704229,
"learning_rate": 2.7616099071207433e-05,
"loss": 0.6704981079101563,
"step": 43000
},
{
"epoch": 1.578660860097986,
"learning_rate": 2.7470406119103993e-05,
"loss": 0.6157642822265625,
"step": 43500
},
{
"epoch": 1.596806387225549,
"learning_rate": 2.7324713167000547e-05,
"loss": 0.6634461059570312,
"step": 44000
},
{
"epoch": 1.614951914353112,
"learning_rate": 2.7179020214897107e-05,
"loss": 0.600257080078125,
"step": 44500
},
{
"epoch": 1.633097441480675,
"learning_rate": 2.7033327262793664e-05,
"loss": 0.6623485107421875,
"step": 45000
},
{
"epoch": 1.651242968608238,
"learning_rate": 2.6887634310690224e-05,
"loss": 0.5999126586914062,
"step": 45500
},
{
"epoch": 1.6693884957358012,
"learning_rate": 2.674194135858678e-05,
"loss": 0.69545458984375,
"step": 46000
},
{
"epoch": 1.6875340228633642,
"learning_rate": 2.6596248406483338e-05,
"loss": 0.607707763671875,
"step": 46500
},
{
"epoch": 1.7056795499909272,
"learning_rate": 2.64505554543799e-05,
"loss": 0.616205322265625,
"step": 47000
},
{
"epoch": 1.7238250771184904,
"learning_rate": 2.6304862502276452e-05,
"loss": 0.6300867309570313,
"step": 47500
},
{
"epoch": 1.7419706042460534,
"learning_rate": 2.6159169550173012e-05,
"loss": 0.6011024780273437,
"step": 48000
},
{
"epoch": 1.7601161313736164,
"learning_rate": 2.601347659806957e-05,
"loss": 0.640543212890625,
"step": 48500
},
{
"epoch": 1.7782616585011795,
"learning_rate": 2.586778364596613e-05,
"loss": 0.6576856079101563,
"step": 49000
},
{
"epoch": 1.7964071856287425,
"learning_rate": 2.5722090693862683e-05,
"loss": 0.6559103393554687,
"step": 49500
},
{
"epoch": 1.8145527127563055,
"learning_rate": 2.5576397741759244e-05,
"loss": 0.5990095825195313,
"step": 50000
},
{
"epoch": 1.8326982398838685,
"learning_rate": 2.5430704789655804e-05,
"loss": 0.665473876953125,
"step": 50500
},
{
"epoch": 1.8508437670114317,
"learning_rate": 2.528501183755236e-05,
"loss": 0.6510874633789062,
"step": 51000
},
{
"epoch": 1.8689892941389947,
"learning_rate": 2.513931888544892e-05,
"loss": 0.6525092163085937,
"step": 51500
},
{
"epoch": 1.8871348212665577,
"learning_rate": 2.4993625933345475e-05,
"loss": 0.621588623046875,
"step": 52000
},
{
"epoch": 1.905280348394121,
"learning_rate": 2.4847932981242035e-05,
"loss": 0.6466250610351563,
"step": 52500
},
{
"epoch": 1.923425875521684,
"learning_rate": 2.4702240029138592e-05,
"loss": 0.6506993408203126,
"step": 53000
},
{
"epoch": 1.941571402649247,
"learning_rate": 2.4556547077035153e-05,
"loss": 0.63045458984375,
"step": 53500
},
{
"epoch": 1.95971692977681,
"learning_rate": 2.4410854124931706e-05,
"loss": 0.6722386474609375,
"step": 54000
},
{
"epoch": 1.977862456904373,
"learning_rate": 2.4265161172828266e-05,
"loss": 0.6592996826171875,
"step": 54500
},
{
"epoch": 1.996007984031936,
"learning_rate": 2.4119468220724827e-05,
"loss": 0.6505784301757812,
"step": 55000
},
{
"epoch": 2.0,
"eval_accuracy": 0.7065592633230761,
"eval_accuracy_not_balanced": 0.7466968279265848,
"eval_f1": 0.7461387954715045,
"eval_loss": 1.030800700187683,
"eval_precision": 0.7471414389441547,
"eval_recall": 0.7466968279265848,
"step": 55110
},
{
"epoch": 2.014153511159499,
"learning_rate": 2.397377526862138e-05,
"loss": 0.4738241882324219,
"step": 55500
},
{
"epoch": 2.032299038287062,
"learning_rate": 2.382808231651794e-05,
"loss": 0.4202625427246094,
"step": 56000
},
{
"epoch": 2.0504445654146255,
"learning_rate": 2.3682389364414498e-05,
"loss": 0.45609054565429685,
"step": 56500
},
{
"epoch": 2.0685900925421885,
"learning_rate": 2.3536696412311058e-05,
"loss": 0.47380087280273436,
"step": 57000
},
{
"epoch": 2.0867356196697515,
"learning_rate": 2.339100346020761e-05,
"loss": 0.43297515869140624,
"step": 57500
},
{
"epoch": 2.1048811467973145,
"learning_rate": 2.3245310508104172e-05,
"loss": 0.45876251220703124,
"step": 58000
},
{
"epoch": 2.1230266739248775,
"learning_rate": 2.309961755600073e-05,
"loss": 0.43873019409179687,
"step": 58500
},
{
"epoch": 2.1411722010524405,
"learning_rate": 2.295392460389729e-05,
"loss": 0.4506067810058594,
"step": 59000
},
{
"epoch": 2.1593177281800036,
"learning_rate": 2.280823165179385e-05,
"loss": 0.434221923828125,
"step": 59500
},
{
"epoch": 2.1774632553075666,
"learning_rate": 2.2662538699690403e-05,
"loss": 0.4482630615234375,
"step": 60000
},
{
"epoch": 2.1956087824351296,
"learning_rate": 2.2516845747586964e-05,
"loss": 0.42872335815429685,
"step": 60500
},
{
"epoch": 2.2137543095626926,
"learning_rate": 2.237115279548352e-05,
"loss": 0.5074381103515625,
"step": 61000
},
{
"epoch": 2.231899836690256,
"learning_rate": 2.222545984338008e-05,
"loss": 0.4730833740234375,
"step": 61500
},
{
"epoch": 2.250045363817819,
"learning_rate": 2.2079766891276634e-05,
"loss": 0.5002789001464844,
"step": 62000
},
{
"epoch": 2.268190890945382,
"learning_rate": 2.1934073939173195e-05,
"loss": 0.5009871215820313,
"step": 62500
},
{
"epoch": 2.286336418072945,
"learning_rate": 2.178838098706975e-05,
"loss": 0.42858175659179687,
"step": 63000
},
{
"epoch": 2.304481945200508,
"learning_rate": 2.1642688034966312e-05,
"loss": 0.47521218872070314,
"step": 63500
},
{
"epoch": 2.322627472328071,
"learning_rate": 2.149699508286287e-05,
"loss": 0.44123291015625,
"step": 64000
},
{
"epoch": 2.340772999455634,
"learning_rate": 2.1351302130759426e-05,
"loss": 0.5162994384765625,
"step": 64500
},
{
"epoch": 2.358918526583197,
"learning_rate": 2.1205609178655986e-05,
"loss": 0.4803743896484375,
"step": 65000
},
{
"epoch": 2.37706405371076,
"learning_rate": 2.105991622655254e-05,
"loss": 0.43930706787109375,
"step": 65500
},
{
"epoch": 2.3952095808383236,
"learning_rate": 2.09142232744491e-05,
"loss": 0.4680689697265625,
"step": 66000
},
{
"epoch": 2.4133551079658866,
"learning_rate": 2.0768530322345657e-05,
"loss": 0.47557147216796875,
"step": 66500
},
{
"epoch": 2.4315006350934496,
"learning_rate": 2.0622837370242218e-05,
"loss": 0.45378570556640624,
"step": 67000
},
{
"epoch": 2.4496461622210126,
"learning_rate": 2.047714441813877e-05,
"loss": 0.49939556884765623,
"step": 67500
},
{
"epoch": 2.4677916893485756,
"learning_rate": 2.033145146603533e-05,
"loss": 0.46348507690429686,
"step": 68000
},
{
"epoch": 2.4859372164761386,
"learning_rate": 2.0185758513931892e-05,
"loss": 0.4665544128417969,
"step": 68500
},
{
"epoch": 2.5040827436037016,
"learning_rate": 2.004006556182845e-05,
"loss": 0.5064073486328124,
"step": 69000
},
{
"epoch": 2.5222282707312647,
"learning_rate": 1.9894372609725006e-05,
"loss": 0.4335787658691406,
"step": 69500
},
{
"epoch": 2.5403737978588277,
"learning_rate": 1.9748679657621563e-05,
"loss": 0.4678582458496094,
"step": 70000
},
{
"epoch": 2.558519324986391,
"learning_rate": 1.960298670551812e-05,
"loss": 0.44978302001953124,
"step": 70500
},
{
"epoch": 2.5766648521139537,
"learning_rate": 1.945729375341468e-05,
"loss": 0.4275310363769531,
"step": 71000
},
{
"epoch": 2.594810379241517,
"learning_rate": 1.931160080131124e-05,
"loss": 0.4475072326660156,
"step": 71500
},
{
"epoch": 2.61295590636908,
"learning_rate": 1.9165907849207797e-05,
"loss": 0.4803515625,
"step": 72000
},
{
"epoch": 2.631101433496643,
"learning_rate": 1.9020214897104354e-05,
"loss": 0.48513922119140623,
"step": 72500
},
{
"epoch": 2.649246960624206,
"learning_rate": 1.887452194500091e-05,
"loss": 0.509461669921875,
"step": 73000
},
{
"epoch": 2.667392487751769,
"learning_rate": 1.8728828992897468e-05,
"loss": 0.439398681640625,
"step": 73500
},
{
"epoch": 2.685538014879332,
"learning_rate": 1.858313604079403e-05,
"loss": 0.49060055541992187,
"step": 74000
},
{
"epoch": 2.703683542006895,
"learning_rate": 1.8437443088690585e-05,
"loss": 0.4656755676269531,
"step": 74500
},
{
"epoch": 2.721829069134458,
"learning_rate": 1.8291750136587142e-05,
"loss": 0.45277435302734376,
"step": 75000
},
{
"epoch": 2.7399745962620212,
"learning_rate": 1.8146057184483703e-05,
"loss": 0.4694291381835938,
"step": 75500
},
{
"epoch": 2.7581201233895847,
"learning_rate": 1.800036423238026e-05,
"loss": 0.4835218811035156,
"step": 76000
},
{
"epoch": 2.7762656505171477,
"learning_rate": 1.785467128027682e-05,
"loss": 0.4075122680664062,
"step": 76500
},
{
"epoch": 2.7944111776447107,
"learning_rate": 1.7708978328173377e-05,
"loss": 0.48038003540039065,
"step": 77000
},
{
"epoch": 2.8125567047722737,
"learning_rate": 1.7563285376069934e-05,
"loss": 0.48810946655273435,
"step": 77500
},
{
"epoch": 2.8307022318998367,
"learning_rate": 1.741759242396649e-05,
"loss": 0.479569580078125,
"step": 78000
},
{
"epoch": 2.8488477590273997,
"learning_rate": 1.7271899471863048e-05,
"loss": 0.43540292358398436,
"step": 78500
},
{
"epoch": 2.8669932861549627,
"learning_rate": 1.7126206519759608e-05,
"loss": 0.448812744140625,
"step": 79000
},
{
"epoch": 2.8851388132825257,
"learning_rate": 1.6980513567656165e-05,
"loss": 0.4721513671875,
"step": 79500
},
{
"epoch": 2.9032843404100888,
"learning_rate": 1.6834820615552726e-05,
"loss": 0.49795864868164064,
"step": 80000
},
{
"epoch": 2.921429867537652,
"learning_rate": 1.6689127663449282e-05,
"loss": 0.43135833740234375,
"step": 80500
},
{
"epoch": 2.939575394665215,
"learning_rate": 1.654343471134584e-05,
"loss": 0.4152744445800781,
"step": 81000
},
{
"epoch": 2.9577209217927782,
"learning_rate": 1.63977417592424e-05,
"loss": 0.4627979736328125,
"step": 81500
},
{
"epoch": 2.9758664489203412,
"learning_rate": 1.6252048807138957e-05,
"loss": 0.5032968444824218,
"step": 82000
},
{
"epoch": 2.9940119760479043,
"learning_rate": 1.6106355855035514e-05,
"loss": 0.44907989501953127,
"step": 82500
},
{
"epoch": 3.0,
"eval_accuracy": 0.719967526681318,
"eval_accuracy_not_balanced": 0.7567220194334482,
"eval_f1": 0.7562519073319182,
"eval_loss": 1.2266414165496826,
"eval_precision": 0.7566245119201862,
"eval_recall": 0.7567220194334482,
"step": 82665
},
{
"epoch": 3.0121575031754673,
"learning_rate": 1.596066290293207e-05,
"loss": 0.3230574645996094,
"step": 83000
},
{
"epoch": 3.0303030303030303,
"learning_rate": 1.5814969950828628e-05,
"loss": 0.2732647399902344,
"step": 83500
},
{
"epoch": 3.0484485574305933,
"learning_rate": 1.5669276998725188e-05,
"loss": 0.2596098327636719,
"step": 84000
},
{
"epoch": 3.0665940845581563,
"learning_rate": 1.5523584046621748e-05,
"loss": 0.2204219970703125,
"step": 84500
},
{
"epoch": 3.0847396116857193,
"learning_rate": 1.5377891094518305e-05,
"loss": 0.2841182861328125,
"step": 85000
},
{
"epoch": 3.1028851388132823,
"learning_rate": 1.5232198142414862e-05,
"loss": 0.2811924133300781,
"step": 85500
},
{
"epoch": 3.1210306659408458,
"learning_rate": 1.5086505190311421e-05,
"loss": 0.2654357604980469,
"step": 86000
},
{
"epoch": 3.139176193068409,
"learning_rate": 1.4940812238207978e-05,
"loss": 0.28281610107421873,
"step": 86500
},
{
"epoch": 3.157321720195972,
"learning_rate": 1.4795119286104536e-05,
"loss": 0.2979160461425781,
"step": 87000
},
{
"epoch": 3.175467247323535,
"learning_rate": 1.4649426334001093e-05,
"loss": 0.30940216064453124,
"step": 87500
},
{
"epoch": 3.193612774451098,
"learning_rate": 1.450373338189765e-05,
"loss": 0.2786048278808594,
"step": 88000
},
{
"epoch": 3.211758301578661,
"learning_rate": 1.4358040429794209e-05,
"loss": 0.3191018981933594,
"step": 88500
},
{
"epoch": 3.229903828706224,
"learning_rate": 1.421234747769077e-05,
"loss": 0.2721468200683594,
"step": 89000
},
{
"epoch": 3.248049355833787,
"learning_rate": 1.4066654525587326e-05,
"loss": 0.2782774963378906,
"step": 89500
},
{
"epoch": 3.26619488296135,
"learning_rate": 1.3920961573483885e-05,
"loss": 0.24969586181640624,
"step": 90000
},
{
"epoch": 3.2843404100889133,
"learning_rate": 1.3775268621380442e-05,
"loss": 0.3144935302734375,
"step": 90500
},
{
"epoch": 3.3024859372164763,
"learning_rate": 1.3629575669277e-05,
"loss": 0.3104676208496094,
"step": 91000
},
{
"epoch": 3.3206314643440393,
"learning_rate": 1.3483882717173558e-05,
"loss": 0.31101416015625,
"step": 91500
},
{
"epoch": 3.3387769914716023,
"learning_rate": 1.3338189765070115e-05,
"loss": 0.2874825744628906,
"step": 92000
},
{
"epoch": 3.3569225185991654,
"learning_rate": 1.3192496812966673e-05,
"loss": 0.30249465942382814,
"step": 92500
},
{
"epoch": 3.3750680457267284,
"learning_rate": 1.304680386086323e-05,
"loss": 0.3033802185058594,
"step": 93000
},
{
"epoch": 3.3932135728542914,
"learning_rate": 1.290111090875979e-05,
"loss": 0.2691561279296875,
"step": 93500
},
{
"epoch": 3.4113590999818544,
"learning_rate": 1.2755417956656349e-05,
"loss": 0.2455874786376953,
"step": 94000
},
{
"epoch": 3.4295046271094174,
"learning_rate": 1.2609725004552906e-05,
"loss": 0.24622802734375,
"step": 94500
},
{
"epoch": 3.4476501542369804,
"learning_rate": 1.2464032052449465e-05,
"loss": 0.29979901123046876,
"step": 95000
},
{
"epoch": 3.4657956813645434,
"learning_rate": 1.2318339100346022e-05,
"loss": 0.28493136596679686,
"step": 95500
},
{
"epoch": 3.483941208492107,
"learning_rate": 1.217264614824258e-05,
"loss": 0.22248597717285157,
"step": 96000
},
{
"epoch": 3.50208673561967,
"learning_rate": 1.2026953196139137e-05,
"loss": 0.29626983642578125,
"step": 96500
},
{
"epoch": 3.520232262747233,
"learning_rate": 1.1881260244035694e-05,
"loss": 0.22462777709960938,
"step": 97000
},
{
"epoch": 3.538377789874796,
"learning_rate": 1.1735567291932253e-05,
"loss": 0.31633203125,
"step": 97500
},
{
"epoch": 3.556523317002359,
"learning_rate": 1.1589874339828813e-05,
"loss": 0.29999240112304687,
"step": 98000
},
{
"epoch": 3.574668844129922,
"learning_rate": 1.144418138772537e-05,
"loss": 0.3205788879394531,
"step": 98500
},
{
"epoch": 3.592814371257485,
"learning_rate": 1.1298488435621929e-05,
"loss": 0.31732611083984374,
"step": 99000
},
{
"epoch": 3.610959898385048,
"learning_rate": 1.1152795483518486e-05,
"loss": 0.2857419128417969,
"step": 99500
},
{
"epoch": 3.629105425512611,
"learning_rate": 1.1007102531415044e-05,
"loss": 0.2731871032714844,
"step": 100000
},
{
"epoch": 3.6472509526401744,
"learning_rate": 1.0861409579311601e-05,
"loss": 0.2926119079589844,
"step": 100500
},
{
"epoch": 3.6653964797677374,
"learning_rate": 1.0715716627208158e-05,
"loss": 0.2275234375,
"step": 101000
},
{
"epoch": 3.6835420068953004,
"learning_rate": 1.0570023675104717e-05,
"loss": 0.2467783660888672,
"step": 101500
},
{
"epoch": 3.7016875340228634,
"learning_rate": 1.0424330723001274e-05,
"loss": 0.29310882568359375,
"step": 102000
},
{
"epoch": 3.7198330611504264,
"learning_rate": 1.0278637770897834e-05,
"loss": 0.2742635803222656,
"step": 102500
},
{
"epoch": 3.7379785882779895,
"learning_rate": 1.0132944818794393e-05,
"loss": 0.3342573547363281,
"step": 103000
},
{
"epoch": 3.7561241154055525,
"learning_rate": 9.98725186669095e-06,
"loss": 0.2816724548339844,
"step": 103500
},
{
"epoch": 3.7742696425331155,
"learning_rate": 9.841558914587509e-06,
"loss": 0.2820220947265625,
"step": 104000
},
{
"epoch": 3.7924151696606785,
"learning_rate": 9.695865962484066e-06,
"loss": 0.26842498779296875,
"step": 104500
},
{
"epoch": 3.810560696788242,
"learning_rate": 9.550173010380624e-06,
"loss": 0.2902957458496094,
"step": 105000
},
{
"epoch": 3.8287062239158045,
"learning_rate": 9.404480058277181e-06,
"loss": 0.2857991943359375,
"step": 105500
},
{
"epoch": 3.846851751043368,
"learning_rate": 9.25878710617374e-06,
"loss": 0.27429071044921877,
"step": 106000
},
{
"epoch": 3.864997278170931,
"learning_rate": 9.113094154070298e-06,
"loss": 0.297792724609375,
"step": 106500
},
{
"epoch": 3.883142805298494,
"learning_rate": 8.967401201966855e-06,
"loss": 0.24031161499023437,
"step": 107000
},
{
"epoch": 3.901288332426057,
"learning_rate": 8.821708249863414e-06,
"loss": 0.2540105285644531,
"step": 107500
},
{
"epoch": 3.91943385955362,
"learning_rate": 8.676015297759973e-06,
"loss": 0.19631956481933593,
"step": 108000
},
{
"epoch": 3.937579386681183,
"learning_rate": 8.53032234565653e-06,
"loss": 0.283151611328125,
"step": 108500
},
{
"epoch": 3.955724913808746,
"learning_rate": 8.384629393553088e-06,
"loss": 0.25643258666992186,
"step": 109000
},
{
"epoch": 3.973870440936309,
"learning_rate": 8.238936441449645e-06,
"loss": 0.28699124145507815,
"step": 109500
},
{
"epoch": 3.992015968063872,
"learning_rate": 8.093243489346202e-06,
"loss": 0.28246923828125,
"step": 110000
},
{
"epoch": 4.0,
"eval_accuracy": 0.7364013629648626,
"eval_accuracy_not_balanced": 0.7658732198858671,
"eval_f1": 0.7663226389044607,
"eval_loss": 1.4270503520965576,
"eval_precision": 0.7674868056440829,
"eval_recall": 0.7658732198858671,
"step": 110220
},
{
"epoch": 4.0101614951914355,
"learning_rate": 7.947550537242763e-06,
"loss": 0.1843712615966797,
"step": 110500
},
{
"epoch": 4.028307022318998,
"learning_rate": 7.80185758513932e-06,
"loss": 0.12554346466064453,
"step": 111000
},
{
"epoch": 4.0464525494465615,
"learning_rate": 7.656164633035878e-06,
"loss": 0.1526964111328125,
"step": 111500
},
{
"epoch": 4.064598076574124,
"learning_rate": 7.510471680932435e-06,
"loss": 0.1175911865234375,
"step": 112000
},
{
"epoch": 4.0827436037016875,
"learning_rate": 7.364778728828994e-06,
"loss": 0.13503671264648437,
"step": 112500
},
{
"epoch": 4.100889130829251,
"learning_rate": 7.219085776725552e-06,
"loss": 0.15723780822753905,
"step": 113000
},
{
"epoch": 4.119034657956814,
"learning_rate": 7.073392824622109e-06,
"loss": 0.14341749572753906,
"step": 113500
},
{
"epoch": 4.137180185084377,
"learning_rate": 6.927699872518667e-06,
"loss": 0.17015084838867187,
"step": 114000
},
{
"epoch": 4.15532571221194,
"learning_rate": 6.782006920415225e-06,
"loss": 0.1238730239868164,
"step": 114500
},
{
"epoch": 4.173471239339503,
"learning_rate": 6.636313968311784e-06,
"loss": 0.14837001037597655,
"step": 115000
},
{
"epoch": 4.191616766467066,
"learning_rate": 6.4906210162083415e-06,
"loss": 0.15362150573730468,
"step": 115500
},
{
"epoch": 4.209762293594629,
"learning_rate": 6.344928064104899e-06,
"loss": 0.13405267333984375,
"step": 116000
},
{
"epoch": 4.227907820722192,
"learning_rate": 6.199235112001457e-06,
"loss": 0.13654838562011717,
"step": 116500
},
{
"epoch": 4.246053347849755,
"learning_rate": 6.053542159898016e-06,
"loss": 0.1306242218017578,
"step": 117000
},
{
"epoch": 4.264198874977318,
"learning_rate": 5.9078492077945735e-06,
"loss": 0.16896232604980468,
"step": 117500
},
{
"epoch": 4.282344402104881,
"learning_rate": 5.762156255691131e-06,
"loss": 0.14876005554199218,
"step": 118000
},
{
"epoch": 4.3004899292324446,
"learning_rate": 5.616463303587689e-06,
"loss": 0.18081915283203126,
"step": 118500
},
{
"epoch": 4.318635456360007,
"learning_rate": 5.470770351484247e-06,
"loss": 0.13800569152832032,
"step": 119000
},
{
"epoch": 4.336780983487571,
"learning_rate": 5.325077399380806e-06,
"loss": 0.12778076934814453,
"step": 119500
},
{
"epoch": 4.354926510615133,
"learning_rate": 5.179384447277363e-06,
"loss": 0.16787666320800781,
"step": 120000
},
{
"epoch": 4.373072037742697,
"learning_rate": 5.033691495173921e-06,
"loss": 0.1471616668701172,
"step": 120500
},
{
"epoch": 4.391217564870259,
"learning_rate": 4.88799854307048e-06,
"loss": 0.14897169494628906,
"step": 121000
},
{
"epoch": 4.409363091997823,
"learning_rate": 4.742305590967038e-06,
"loss": 0.10772408294677735,
"step": 121500
},
{
"epoch": 4.427508619125385,
"learning_rate": 4.5966126388635955e-06,
"loss": 0.16820535278320312,
"step": 122000
},
{
"epoch": 4.445654146252949,
"learning_rate": 4.450919686760153e-06,
"loss": 0.15158670043945313,
"step": 122500
},
{
"epoch": 4.463799673380512,
"learning_rate": 4.305226734656711e-06,
"loss": 0.1802527618408203,
"step": 123000
},
{
"epoch": 4.481945200508075,
"learning_rate": 4.15953378255327e-06,
"loss": 0.12233680725097656,
"step": 123500
},
{
"epoch": 4.500090727635638,
"learning_rate": 4.0138408304498275e-06,
"loss": 0.13587774658203125,
"step": 124000
},
{
"epoch": 4.518236254763201,
"learning_rate": 3.868147878346385e-06,
"loss": 0.1264299087524414,
"step": 124500
},
{
"epoch": 4.536381781890764,
"learning_rate": 3.722454926242943e-06,
"loss": 0.15187008666992188,
"step": 125000
},
{
"epoch": 4.554527309018327,
"learning_rate": 3.5767619741395014e-06,
"loss": 0.13250352478027344,
"step": 125500
},
{
"epoch": 4.57267283614589,
"learning_rate": 3.431069022036059e-06,
"loss": 0.11472991180419922,
"step": 126000
},
{
"epoch": 4.590818363273453,
"learning_rate": 3.285376069932617e-06,
"loss": 0.17263003540039062,
"step": 126500
},
{
"epoch": 4.608963890401016,
"learning_rate": 3.139683117829175e-06,
"loss": 0.1344503173828125,
"step": 127000
},
{
"epoch": 4.62710941752858,
"learning_rate": 2.993990165725733e-06,
"loss": 0.1357424774169922,
"step": 127500
},
{
"epoch": 4.645254944656142,
"learning_rate": 2.8482972136222912e-06,
"loss": 0.12101192474365234,
"step": 128000
},
{
"epoch": 4.663400471783706,
"learning_rate": 2.702604261518849e-06,
"loss": 0.13540719604492188,
"step": 128500
},
{
"epoch": 4.681545998911268,
"learning_rate": 2.5569113094154073e-06,
"loss": 0.14367697143554686,
"step": 129000
},
{
"epoch": 4.699691526038832,
"learning_rate": 2.411218357311965e-06,
"loss": 0.13124786376953124,
"step": 129500
},
{
"epoch": 4.717837053166394,
"learning_rate": 2.2655254052085233e-06,
"loss": 0.147208984375,
"step": 130000
},
{
"epoch": 4.735982580293958,
"learning_rate": 2.119832453105081e-06,
"loss": 0.15938619995117187,
"step": 130500
},
{
"epoch": 4.75412810742152,
"learning_rate": 1.9741395010016393e-06,
"loss": 0.1515483093261719,
"step": 131000
},
{
"epoch": 4.772273634549084,
"learning_rate": 1.8284465488981974e-06,
"loss": 0.14191549682617188,
"step": 131500
},
{
"epoch": 4.790419161676647,
"learning_rate": 1.6827535967947554e-06,
"loss": 0.13431153869628906,
"step": 132000
},
{
"epoch": 4.80856468880421,
"learning_rate": 1.537060644691313e-06,
"loss": 0.16412876892089845,
"step": 132500
},
{
"epoch": 4.826710215931773,
"learning_rate": 1.391367692587871e-06,
"loss": 0.15667465209960937,
"step": 133000
},
{
"epoch": 4.844855743059336,
"learning_rate": 1.2456747404844292e-06,
"loss": 0.11217301940917969,
"step": 133500
},
{
"epoch": 4.863001270186899,
"learning_rate": 1.0999817883809872e-06,
"loss": 0.13955929565429687,
"step": 134000
},
{
"epoch": 4.881146797314462,
"learning_rate": 9.542888362775452e-07,
"loss": 0.1466508331298828,
"step": 134500
},
{
"epoch": 4.899292324442025,
"learning_rate": 8.085958841741031e-07,
"loss": 0.16482601928710938,
"step": 135000
},
{
"epoch": 4.917437851569588,
"learning_rate": 6.629029320706612e-07,
"loss": 0.1277398147583008,
"step": 135500
},
{
"epoch": 4.935583378697151,
"learning_rate": 5.172099799672192e-07,
"loss": 0.12898980712890626,
"step": 136000
},
{
"epoch": 4.953728905824715,
"learning_rate": 3.715170278637771e-07,
"loss": 0.16622706604003906,
"step": 136500
},
{
"epoch": 4.971874432952277,
"learning_rate": 2.258240757603351e-07,
"loss": 0.1129389190673828,
"step": 137000
},
{
"epoch": 4.99001996007984,
"learning_rate": 8.013112365689311e-08,
"loss": 0.11422783660888672,
"step": 137500
},
{
"epoch": 5.0,
"eval_accuracy": 0.7449912424577604,
"eval_accuracy_not_balanced": 0.7708601100200504,
"eval_f1": 0.771311438926646,
"eval_loss": 1.6117682456970215,
"eval_precision": 0.7719334049472679,
"eval_recall": 0.7708601100200504,
"step": 137775
}
],
"max_steps": 137775,
"num_train_epochs": 5,
"total_flos": 43097270746046340,
"trial_name": null,
"trial_params": null
}