|
{ |
|
"best_metric": 0.6680810938236681, |
|
"best_model_checkpoint": "videomae-base-finetuned-chickenbehaviour-2/checkpoint-23865", |
|
"epoch": 79.01218956303049, |
|
"eval_steps": 500, |
|
"global_step": 127240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 34.09493637084961, |
|
"learning_rate": 6.251964790946243e-06, |
|
"loss": 2.035, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.5115511551155115, |
|
"eval_f1": 0.4139359299241216, |
|
"eval_loss": 1.6961450576782227, |
|
"eval_precision": 0.426389188260245, |
|
"eval_recall": 0.5115511551155115, |
|
"eval_runtime": 443.2234, |
|
"eval_samples_per_second": 9.571, |
|
"eval_steps_per_second": 1.198, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 25.231983184814453, |
|
"learning_rate": 1.2503929581892487e-05, |
|
"loss": 1.5431, |
|
"step": 3182 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.5898161244695899, |
|
"eval_f1": 0.5166622256451852, |
|
"eval_loss": 1.439492106437683, |
|
"eval_precision": 0.5240058255527255, |
|
"eval_recall": 0.5898161244695899, |
|
"eval_runtime": 437.564, |
|
"eval_samples_per_second": 9.695, |
|
"eval_steps_per_second": 1.214, |
|
"step": 3182 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 24.721105575561523, |
|
"learning_rate": 1.875589437283873e-05, |
|
"loss": 1.4118, |
|
"step": 4773 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.6051390853371051, |
|
"eval_f1": 0.5535236961754626, |
|
"eval_loss": 1.363150715827942, |
|
"eval_precision": 0.5552721379615939, |
|
"eval_recall": 0.6051390853371051, |
|
"eval_runtime": 683.9412, |
|
"eval_samples_per_second": 6.202, |
|
"eval_steps_per_second": 0.776, |
|
"step": 4773 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 7.513462066650391, |
|
"learning_rate": 2.5007859163784973e-05, |
|
"loss": 1.3413, |
|
"step": 6364 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.6020744931636021, |
|
"eval_f1": 0.5384080508073906, |
|
"eval_loss": 1.331170678138733, |
|
"eval_precision": 0.551611875314686, |
|
"eval_recall": 0.6020744931636021, |
|
"eval_runtime": 680.73, |
|
"eval_samples_per_second": 6.232, |
|
"eval_steps_per_second": 0.78, |
|
"step": 6364 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 23.583574295043945, |
|
"learning_rate": 3.125982395473121e-05, |
|
"loss": 1.2969, |
|
"step": 7955 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_accuracy": 0.6211692597831212, |
|
"eval_f1": 0.5663241746963936, |
|
"eval_loss": 1.273905873298645, |
|
"eval_precision": 0.6121530259266542, |
|
"eval_recall": 0.6211692597831212, |
|
"eval_runtime": 663.3464, |
|
"eval_samples_per_second": 6.395, |
|
"eval_steps_per_second": 0.8, |
|
"step": 7955 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 12.484142303466797, |
|
"learning_rate": 3.751178874567746e-05, |
|
"loss": 1.2636, |
|
"step": 9546 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy": 0.6058462989156058, |
|
"eval_f1": 0.5429975108084805, |
|
"eval_loss": 1.3211930990219116, |
|
"eval_precision": 0.6186956973637323, |
|
"eval_recall": 0.6058462989156058, |
|
"eval_runtime": 661.8451, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 0.802, |
|
"step": 9546 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 6.579521179199219, |
|
"learning_rate": 4.3763753536623704e-05, |
|
"loss": 1.2231, |
|
"step": 11137 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_accuracy": 0.6242338519566243, |
|
"eval_f1": 0.5747264230461347, |
|
"eval_loss": 1.2543139457702637, |
|
"eval_precision": 0.6460983768240532, |
|
"eval_recall": 0.6242338519566243, |
|
"eval_runtime": 669.9717, |
|
"eval_samples_per_second": 6.332, |
|
"eval_steps_per_second": 0.793, |
|
"step": 11137 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 12.255209922790527, |
|
"learning_rate": 4.99982535191589e-05, |
|
"loss": 1.1989, |
|
"step": 12728 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_accuracy": 0.6404997642621405, |
|
"eval_f1": 0.5869125503826516, |
|
"eval_loss": 1.2377874851226807, |
|
"eval_precision": 0.6356351683189131, |
|
"eval_recall": 0.6404997642621405, |
|
"eval_runtime": 678.5987, |
|
"eval_samples_per_second": 6.251, |
|
"eval_steps_per_second": 0.782, |
|
"step": 12728 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 9.07247257232666, |
|
"learning_rate": 4.9303590764609316e-05, |
|
"loss": 1.1566, |
|
"step": 14319 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"eval_accuracy": 0.6527581329561527, |
|
"eval_f1": 0.594207871765373, |
|
"eval_loss": 1.2123682498931885, |
|
"eval_precision": 0.6198764093734253, |
|
"eval_recall": 0.6527581329561527, |
|
"eval_runtime": 667.9014, |
|
"eval_samples_per_second": 6.351, |
|
"eval_steps_per_second": 0.795, |
|
"step": 14319 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 13.849576950073242, |
|
"learning_rate": 4.8608928010059734e-05, |
|
"loss": 1.1145, |
|
"step": 15910 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_accuracy": 0.6475719000471476, |
|
"eval_f1": 0.605227949854272, |
|
"eval_loss": 1.1802877187728882, |
|
"eval_precision": 0.6340925434803425, |
|
"eval_recall": 0.6475719000471476, |
|
"eval_runtime": 672.4937, |
|
"eval_samples_per_second": 6.308, |
|
"eval_steps_per_second": 0.79, |
|
"step": 15910 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"grad_norm": 26.01058006286621, |
|
"learning_rate": 4.791426525551015e-05, |
|
"loss": 1.0567, |
|
"step": 17501 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"eval_accuracy": 0.6265912305516266, |
|
"eval_f1": 0.5969458291906395, |
|
"eval_loss": 1.2576700448989868, |
|
"eval_precision": 0.6278990782548687, |
|
"eval_recall": 0.6265912305516266, |
|
"eval_runtime": 662.684, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 0.801, |
|
"step": 17501 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"grad_norm": 4.436341285705566, |
|
"learning_rate": 4.721960250096057e-05, |
|
"loss": 1.0172, |
|
"step": 19092 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"eval_accuracy": 0.657001414427157, |
|
"eval_f1": 0.6082570131553916, |
|
"eval_loss": 1.1961308717727661, |
|
"eval_precision": 0.6369039125738234, |
|
"eval_recall": 0.657001414427157, |
|
"eval_runtime": 753.1481, |
|
"eval_samples_per_second": 5.632, |
|
"eval_steps_per_second": 0.705, |
|
"step": 19092 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"grad_norm": 12.711715698242188, |
|
"learning_rate": 4.652493974641098e-05, |
|
"loss": 0.9817, |
|
"step": 20683 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"eval_accuracy": 0.6619519094766619, |
|
"eval_f1": 0.6048958777519221, |
|
"eval_loss": 1.2287497520446777, |
|
"eval_precision": 0.6498679093091749, |
|
"eval_recall": 0.6619519094766619, |
|
"eval_runtime": 765.9815, |
|
"eval_samples_per_second": 5.538, |
|
"eval_steps_per_second": 0.693, |
|
"step": 20683 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"grad_norm": 6.408486366271973, |
|
"learning_rate": 4.583027699186141e-05, |
|
"loss": 0.9279, |
|
"step": 22274 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"eval_accuracy": 0.6548797736916548, |
|
"eval_f1": 0.6213080237690763, |
|
"eval_loss": 1.2358391284942627, |
|
"eval_precision": 0.6503596413080538, |
|
"eval_recall": 0.6548797736916548, |
|
"eval_runtime": 754.2864, |
|
"eval_samples_per_second": 5.624, |
|
"eval_steps_per_second": 0.704, |
|
"step": 22274 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"grad_norm": 14.318334579467773, |
|
"learning_rate": 4.513561423731182e-05, |
|
"loss": 0.8913, |
|
"step": 23865 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_accuracy": 0.6680810938236681, |
|
"eval_f1": 0.6307923814434653, |
|
"eval_loss": 1.181541919708252, |
|
"eval_precision": 0.6325361512013947, |
|
"eval_recall": 0.6680810938236681, |
|
"eval_runtime": 766.2903, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 0.693, |
|
"step": 23865 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"grad_norm": 14.112875938415527, |
|
"learning_rate": 4.444095148276224e-05, |
|
"loss": 0.8559, |
|
"step": 25456 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"eval_accuracy": 0.639085337105139, |
|
"eval_f1": 0.6036680470772597, |
|
"eval_loss": 1.3211859464645386, |
|
"eval_precision": 0.639173575618385, |
|
"eval_recall": 0.639085337105139, |
|
"eval_runtime": 741.2024, |
|
"eval_samples_per_second": 5.723, |
|
"eval_steps_per_second": 0.716, |
|
"step": 25456 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"grad_norm": 19.837888717651367, |
|
"learning_rate": 4.3746288728212655e-05, |
|
"loss": 0.8083, |
|
"step": 27047 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"eval_accuracy": 0.6230551626591231, |
|
"eval_f1": 0.6006019601279614, |
|
"eval_loss": 1.307276725769043, |
|
"eval_precision": 0.6250962347990193, |
|
"eval_recall": 0.6230551626591231, |
|
"eval_runtime": 755.5211, |
|
"eval_samples_per_second": 5.615, |
|
"eval_steps_per_second": 0.703, |
|
"step": 27047 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"grad_norm": 7.695098876953125, |
|
"learning_rate": 4.305162597366307e-05, |
|
"loss": 0.7662, |
|
"step": 28638 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"eval_accuracy": 0.6461574728901461, |
|
"eval_f1": 0.6214078287802024, |
|
"eval_loss": 1.2981834411621094, |
|
"eval_precision": 0.625200272023183, |
|
"eval_recall": 0.6461574728901461, |
|
"eval_runtime": 760.7104, |
|
"eval_samples_per_second": 5.576, |
|
"eval_steps_per_second": 0.698, |
|
"step": 28638 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"grad_norm": 17.791141510009766, |
|
"learning_rate": 4.2356963219113485e-05, |
|
"loss": 0.7363, |
|
"step": 30229 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"eval_accuracy": 0.6574728901461575, |
|
"eval_f1": 0.6264016299127368, |
|
"eval_loss": 1.3019486665725708, |
|
"eval_precision": 0.6427931632515602, |
|
"eval_recall": 0.6574728901461575, |
|
"eval_runtime": 732.8997, |
|
"eval_samples_per_second": 5.788, |
|
"eval_steps_per_second": 0.725, |
|
"step": 30229 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"grad_norm": 12.370431900024414, |
|
"learning_rate": 4.166230046456391e-05, |
|
"loss": 0.6787, |
|
"step": 31820 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_accuracy": 0.6511079679396511, |
|
"eval_f1": 0.6229627569285623, |
|
"eval_loss": 1.3867465257644653, |
|
"eval_precision": 0.6368072307821887, |
|
"eval_recall": 0.6511079679396511, |
|
"eval_runtime": 662.5188, |
|
"eval_samples_per_second": 6.403, |
|
"eval_steps_per_second": 0.801, |
|
"step": 31820 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"grad_norm": 10.867647171020508, |
|
"learning_rate": 4.096763771001432e-05, |
|
"loss": 0.6433, |
|
"step": 33411 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"eval_accuracy": 0.6364922206506365, |
|
"eval_f1": 0.6138991311356594, |
|
"eval_loss": 1.4018532037734985, |
|
"eval_precision": 0.6374756941286471, |
|
"eval_recall": 0.6364922206506365, |
|
"eval_runtime": 667.8552, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 0.795, |
|
"step": 33411 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"grad_norm": 23.982513427734375, |
|
"learning_rate": 4.0272974955464746e-05, |
|
"loss": 0.5969, |
|
"step": 35002 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"eval_accuracy": 0.6341348420556341, |
|
"eval_f1": 0.6104193509325564, |
|
"eval_loss": 1.4419147968292236, |
|
"eval_precision": 0.6211641893240438, |
|
"eval_recall": 0.6341348420556341, |
|
"eval_runtime": 657.2327, |
|
"eval_samples_per_second": 6.454, |
|
"eval_steps_per_second": 0.808, |
|
"step": 35002 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"grad_norm": 4.0233635902404785, |
|
"learning_rate": 3.957831220091516e-05, |
|
"loss": 0.563, |
|
"step": 36593 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"eval_accuracy": 0.6508722300801508, |
|
"eval_f1": 0.6170283959147944, |
|
"eval_loss": 1.4777988195419312, |
|
"eval_precision": 0.6292668904937919, |
|
"eval_recall": 0.6508722300801508, |
|
"eval_runtime": 657.9566, |
|
"eval_samples_per_second": 6.447, |
|
"eval_steps_per_second": 0.807, |
|
"step": 36593 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"grad_norm": 23.503877639770508, |
|
"learning_rate": 3.8883649446365576e-05, |
|
"loss": 0.5252, |
|
"step": 38184 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"eval_accuracy": 0.6433286185761433, |
|
"eval_f1": 0.6213976507987007, |
|
"eval_loss": 1.486406683921814, |
|
"eval_precision": 0.6316274985409115, |
|
"eval_recall": 0.6433286185761433, |
|
"eval_runtime": 652.8988, |
|
"eval_samples_per_second": 6.497, |
|
"eval_steps_per_second": 0.813, |
|
"step": 38184 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"grad_norm": 29.642290115356445, |
|
"learning_rate": 3.8188986691815994e-05, |
|
"loss": 0.5, |
|
"step": 39775 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"eval_accuracy": 0.6232909005186232, |
|
"eval_f1": 0.6023496407577721, |
|
"eval_loss": 1.6704081296920776, |
|
"eval_precision": 0.6273005697774543, |
|
"eval_recall": 0.6232909005186232, |
|
"eval_runtime": 651.691, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 0.815, |
|
"step": 39775 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"grad_norm": 0.25407856702804565, |
|
"learning_rate": 3.749432393726641e-05, |
|
"loss": 0.4622, |
|
"step": 41366 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"eval_accuracy": 0.6487505893446488, |
|
"eval_f1": 0.6119499708020253, |
|
"eval_loss": 1.665787935256958, |
|
"eval_precision": 0.6259817520615986, |
|
"eval_recall": 0.6487505893446488, |
|
"eval_runtime": 664.0356, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 0.8, |
|
"step": 41366 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"grad_norm": 12.53039836883545, |
|
"learning_rate": 3.6799661182716824e-05, |
|
"loss": 0.4292, |
|
"step": 42957 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"eval_accuracy": 0.6494578029231495, |
|
"eval_f1": 0.6242771027666587, |
|
"eval_loss": 1.6428192853927612, |
|
"eval_precision": 0.6286829818492489, |
|
"eval_recall": 0.6494578029231495, |
|
"eval_runtime": 673.3255, |
|
"eval_samples_per_second": 6.3, |
|
"eval_steps_per_second": 0.789, |
|
"step": 42957 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"grad_norm": 2.96919846534729, |
|
"learning_rate": 3.610499842816725e-05, |
|
"loss": 0.4044, |
|
"step": 44548 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"eval_accuracy": 0.6586515794436586, |
|
"eval_f1": 0.6387100670675616, |
|
"eval_loss": 1.6702899932861328, |
|
"eval_precision": 0.6310773161399348, |
|
"eval_recall": 0.6586515794436586, |
|
"eval_runtime": 657.3579, |
|
"eval_samples_per_second": 6.453, |
|
"eval_steps_per_second": 0.808, |
|
"step": 44548 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"grad_norm": 2.961900472640991, |
|
"learning_rate": 3.541033567361766e-05, |
|
"loss": 0.3952, |
|
"step": 46139 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"eval_accuracy": 0.632956152758133, |
|
"eval_f1": 0.6123356581191824, |
|
"eval_loss": 1.7576137781143188, |
|
"eval_precision": 0.6170827944289541, |
|
"eval_recall": 0.632956152758133, |
|
"eval_runtime": 659.0162, |
|
"eval_samples_per_second": 6.437, |
|
"eval_steps_per_second": 0.806, |
|
"step": 46139 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"grad_norm": 17.30776596069336, |
|
"learning_rate": 3.471567291906808e-05, |
|
"loss": 0.3681, |
|
"step": 47730 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_accuracy": 0.6553512494106554, |
|
"eval_f1": 0.6231268362993411, |
|
"eval_loss": 1.9031915664672852, |
|
"eval_precision": 0.6349163976683712, |
|
"eval_recall": 0.6553512494106554, |
|
"eval_runtime": 657.4951, |
|
"eval_samples_per_second": 6.452, |
|
"eval_steps_per_second": 0.808, |
|
"step": 47730 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"grad_norm": 15.424605369567871, |
|
"learning_rate": 3.40210101645185e-05, |
|
"loss": 0.3541, |
|
"step": 49321 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"eval_accuracy": 0.6445073078736445, |
|
"eval_f1": 0.6207211544517277, |
|
"eval_loss": 1.9507993459701538, |
|
"eval_precision": 0.632026480012122, |
|
"eval_recall": 0.6445073078736445, |
|
"eval_runtime": 655.6116, |
|
"eval_samples_per_second": 6.47, |
|
"eval_steps_per_second": 0.81, |
|
"step": 49321 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"grad_norm": 0.31878066062927246, |
|
"learning_rate": 3.3326347409968915e-05, |
|
"loss": 0.322, |
|
"step": 50912 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"eval_accuracy": 0.6225836869401226, |
|
"eval_f1": 0.6098707323593034, |
|
"eval_loss": 2.1316964626312256, |
|
"eval_precision": 0.6276797078299916, |
|
"eval_recall": 0.6225836869401226, |
|
"eval_runtime": 655.8263, |
|
"eval_samples_per_second": 6.468, |
|
"eval_steps_per_second": 0.81, |
|
"step": 50912 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"grad_norm": 1.1452162265777588, |
|
"learning_rate": 3.263168465541933e-05, |
|
"loss": 0.3239, |
|
"step": 52503 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"eval_accuracy": 0.6508722300801508, |
|
"eval_f1": 0.6327759879930807, |
|
"eval_loss": 1.9785257577896118, |
|
"eval_precision": 0.6320825441180629, |
|
"eval_recall": 0.6508722300801508, |
|
"eval_runtime": 656.8912, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.808, |
|
"step": 52503 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"grad_norm": 18.911890029907227, |
|
"learning_rate": 3.193702190086975e-05, |
|
"loss": 0.301, |
|
"step": 54094 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"eval_accuracy": 0.6435643564356436, |
|
"eval_f1": 0.6097216315179437, |
|
"eval_loss": 2.2050163745880127, |
|
"eval_precision": 0.6258806132730886, |
|
"eval_recall": 0.6435643564356436, |
|
"eval_runtime": 659.2017, |
|
"eval_samples_per_second": 6.435, |
|
"eval_steps_per_second": 0.806, |
|
"step": 54094 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"grad_norm": 0.37402623891830444, |
|
"learning_rate": 3.124235914632016e-05, |
|
"loss": 0.28, |
|
"step": 55685 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"eval_accuracy": 0.6320132013201321, |
|
"eval_f1": 0.6173670056289928, |
|
"eval_loss": 2.2267725467681885, |
|
"eval_precision": 0.6318691154473476, |
|
"eval_recall": 0.6320132013201321, |
|
"eval_runtime": 655.5706, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 0.81, |
|
"step": 55685 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"grad_norm": 0.24996362626552582, |
|
"learning_rate": 3.054769639177059e-05, |
|
"loss": 0.2742, |
|
"step": 57276 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"eval_accuracy": 0.641914191419142, |
|
"eval_f1": 0.6158490856542709, |
|
"eval_loss": 2.3538448810577393, |
|
"eval_precision": 0.6239469548156946, |
|
"eval_recall": 0.641914191419142, |
|
"eval_runtime": 658.9065, |
|
"eval_samples_per_second": 6.438, |
|
"eval_steps_per_second": 0.806, |
|
"step": 57276 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"grad_norm": 40.534271240234375, |
|
"learning_rate": 2.9853033637221e-05, |
|
"loss": 0.2433, |
|
"step": 58867 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"eval_accuracy": 0.6478076379066479, |
|
"eval_f1": 0.6184054664095173, |
|
"eval_loss": 2.3947157859802246, |
|
"eval_precision": 0.6237019229335129, |
|
"eval_recall": 0.6478076379066479, |
|
"eval_runtime": 658.8157, |
|
"eval_samples_per_second": 6.439, |
|
"eval_steps_per_second": 0.806, |
|
"step": 58867 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"grad_norm": 0.18938298523426056, |
|
"learning_rate": 2.915837088267142e-05, |
|
"loss": 0.2677, |
|
"step": 60458 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"eval_accuracy": 0.6454502593116455, |
|
"eval_f1": 0.6234092398599342, |
|
"eval_loss": 2.400697708129883, |
|
"eval_precision": 0.6284538940001326, |
|
"eval_recall": 0.6454502593116455, |
|
"eval_runtime": 672.9538, |
|
"eval_samples_per_second": 6.304, |
|
"eval_steps_per_second": 0.789, |
|
"step": 60458 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"grad_norm": 0.05054619163274765, |
|
"learning_rate": 2.8463708128121836e-05, |
|
"loss": 0.2316, |
|
"step": 62049 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"eval_accuracy": 0.6296558227251297, |
|
"eval_f1": 0.6119646490374726, |
|
"eval_loss": 2.5197205543518066, |
|
"eval_precision": 0.6245868321675843, |
|
"eval_recall": 0.6296558227251297, |
|
"eval_runtime": 677.5713, |
|
"eval_samples_per_second": 6.261, |
|
"eval_steps_per_second": 0.784, |
|
"step": 62049 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"grad_norm": 0.9518815875053406, |
|
"learning_rate": 2.7769045373572254e-05, |
|
"loss": 0.2229, |
|
"step": 63640 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"eval_accuracy": 0.6506364922206507, |
|
"eval_f1": 0.6235476374240025, |
|
"eval_loss": 2.547842264175415, |
|
"eval_precision": 0.6321911444043747, |
|
"eval_recall": 0.6506364922206507, |
|
"eval_runtime": 680.5865, |
|
"eval_samples_per_second": 6.233, |
|
"eval_steps_per_second": 0.78, |
|
"step": 63640 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"grad_norm": 0.030849022790789604, |
|
"learning_rate": 2.707438261902267e-05, |
|
"loss": 0.215, |
|
"step": 65231 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"eval_accuracy": 0.6445073078736445, |
|
"eval_f1": 0.6209448574869749, |
|
"eval_loss": 2.516798734664917, |
|
"eval_precision": 0.6454990917874304, |
|
"eval_recall": 0.6445073078736445, |
|
"eval_runtime": 663.9051, |
|
"eval_samples_per_second": 6.389, |
|
"eval_steps_per_second": 0.8, |
|
"step": 65231 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"grad_norm": 0.38587260246276855, |
|
"learning_rate": 2.637971986447309e-05, |
|
"loss": 0.2032, |
|
"step": 66822 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"eval_accuracy": 0.6442715700141443, |
|
"eval_f1": 0.6161042310233847, |
|
"eval_loss": 2.6606993675231934, |
|
"eval_precision": 0.6304245945338769, |
|
"eval_recall": 0.6442715700141443, |
|
"eval_runtime": 670.0548, |
|
"eval_samples_per_second": 6.331, |
|
"eval_steps_per_second": 0.792, |
|
"step": 66822 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"grad_norm": 52.90748977661133, |
|
"learning_rate": 2.5685057109923506e-05, |
|
"loss": 0.1957, |
|
"step": 68413 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"eval_accuracy": 0.6218764733616219, |
|
"eval_f1": 0.6058915424282973, |
|
"eval_loss": 2.6433801651000977, |
|
"eval_precision": 0.6206059185377255, |
|
"eval_recall": 0.6218764733616219, |
|
"eval_runtime": 666.3612, |
|
"eval_samples_per_second": 6.366, |
|
"eval_steps_per_second": 0.797, |
|
"step": 68413 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"grad_norm": 0.5584044456481934, |
|
"learning_rate": 2.499039435537392e-05, |
|
"loss": 0.1839, |
|
"step": 70004 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"eval_accuracy": 0.648043375766148, |
|
"eval_f1": 0.6202014579788635, |
|
"eval_loss": 2.637795925140381, |
|
"eval_precision": 0.6181521426299653, |
|
"eval_recall": 0.648043375766148, |
|
"eval_runtime": 669.3865, |
|
"eval_samples_per_second": 6.337, |
|
"eval_steps_per_second": 0.793, |
|
"step": 70004 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"grad_norm": 0.02387963980436325, |
|
"learning_rate": 2.429573160082434e-05, |
|
"loss": 0.1672, |
|
"step": 71595 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"eval_accuracy": 0.632956152758133, |
|
"eval_f1": 0.6095258779451574, |
|
"eval_loss": 2.8354904651641846, |
|
"eval_precision": 0.6175432549732301, |
|
"eval_recall": 0.632956152758133, |
|
"eval_runtime": 658.5104, |
|
"eval_samples_per_second": 6.442, |
|
"eval_steps_per_second": 0.806, |
|
"step": 71595 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"grad_norm": 0.04022861644625664, |
|
"learning_rate": 2.3601068846274757e-05, |
|
"loss": 0.1554, |
|
"step": 73186 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"eval_accuracy": 0.6296558227251297, |
|
"eval_f1": 0.609015505263741, |
|
"eval_loss": 2.8833281993865967, |
|
"eval_precision": 0.6179849508221367, |
|
"eval_recall": 0.6296558227251297, |
|
"eval_runtime": 666.4351, |
|
"eval_samples_per_second": 6.365, |
|
"eval_steps_per_second": 0.797, |
|
"step": 73186 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"grad_norm": 3.8721201419830322, |
|
"learning_rate": 2.2906406091725175e-05, |
|
"loss": 0.1525, |
|
"step": 74777 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"eval_accuracy": 0.6499292786421499, |
|
"eval_f1": 0.6246638391623945, |
|
"eval_loss": 2.8732240200042725, |
|
"eval_precision": 0.6212359780695843, |
|
"eval_recall": 0.6499292786421499, |
|
"eval_runtime": 672.5613, |
|
"eval_samples_per_second": 6.307, |
|
"eval_steps_per_second": 0.79, |
|
"step": 74777 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"grad_norm": 0.009451803751289845, |
|
"learning_rate": 2.221174333717559e-05, |
|
"loss": 0.1443, |
|
"step": 76368 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"eval_accuracy": 0.6513437057991514, |
|
"eval_f1": 0.6297491645477588, |
|
"eval_loss": 2.7935521602630615, |
|
"eval_precision": 0.6239725322581114, |
|
"eval_recall": 0.6513437057991514, |
|
"eval_runtime": 673.447, |
|
"eval_samples_per_second": 6.299, |
|
"eval_steps_per_second": 0.788, |
|
"step": 76368 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"grad_norm": 0.0011284707579761744, |
|
"learning_rate": 2.151708058262601e-05, |
|
"loss": 0.1361, |
|
"step": 77959 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"eval_accuracy": 0.6442715700141443, |
|
"eval_f1": 0.6229754386330917, |
|
"eval_loss": 2.8814539909362793, |
|
"eval_precision": 0.6187358231984663, |
|
"eval_recall": 0.6442715700141443, |
|
"eval_runtime": 674.3209, |
|
"eval_samples_per_second": 6.291, |
|
"eval_steps_per_second": 0.787, |
|
"step": 77959 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"grad_norm": 102.50770568847656, |
|
"learning_rate": 2.0822417828076427e-05, |
|
"loss": 0.1351, |
|
"step": 79550 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_f1": 0.6174587269654099, |
|
"eval_loss": 3.070270299911499, |
|
"eval_precision": 0.6243702935522684, |
|
"eval_recall": 0.6428571428571429, |
|
"eval_runtime": 669.5589, |
|
"eval_samples_per_second": 6.336, |
|
"eval_steps_per_second": 0.793, |
|
"step": 79550 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"grad_norm": 8.653002738952637, |
|
"learning_rate": 2.0127755073526845e-05, |
|
"loss": 0.1196, |
|
"step": 81141 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"eval_accuracy": 0.6423856671381424, |
|
"eval_f1": 0.6190424793456692, |
|
"eval_loss": 3.027528762817383, |
|
"eval_precision": 0.625005878732158, |
|
"eval_recall": 0.6423856671381424, |
|
"eval_runtime": 677.2082, |
|
"eval_samples_per_second": 6.264, |
|
"eval_steps_per_second": 0.784, |
|
"step": 81141 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"grad_norm": 62.25484848022461, |
|
"learning_rate": 1.943309231897726e-05, |
|
"loss": 0.111, |
|
"step": 82732 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"eval_accuracy": 0.641914191419142, |
|
"eval_f1": 0.618920231303522, |
|
"eval_loss": 3.1254563331604004, |
|
"eval_precision": 0.6280670220001587, |
|
"eval_recall": 0.641914191419142, |
|
"eval_runtime": 651.0502, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 0.816, |
|
"step": 82732 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"grad_norm": 18.133270263671875, |
|
"learning_rate": 1.8738429564427678e-05, |
|
"loss": 0.1119, |
|
"step": 84323 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"eval_accuracy": 0.6471004243281471, |
|
"eval_f1": 0.6214547873156199, |
|
"eval_loss": 3.1854350566864014, |
|
"eval_precision": 0.629875200916356, |
|
"eval_recall": 0.6471004243281471, |
|
"eval_runtime": 653.5, |
|
"eval_samples_per_second": 6.491, |
|
"eval_steps_per_second": 0.813, |
|
"step": 84323 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"grad_norm": 19.144214630126953, |
|
"learning_rate": 1.8043766809878096e-05, |
|
"loss": 0.1069, |
|
"step": 85914 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"eval_accuracy": 0.6383781235266384, |
|
"eval_f1": 0.6195011414926834, |
|
"eval_loss": 3.2136049270629883, |
|
"eval_precision": 0.6251413165179668, |
|
"eval_recall": 0.6383781235266384, |
|
"eval_runtime": 653.7257, |
|
"eval_samples_per_second": 6.489, |
|
"eval_steps_per_second": 0.812, |
|
"step": 85914 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"grad_norm": 0.010962074622511864, |
|
"learning_rate": 1.7349104055328515e-05, |
|
"loss": 0.093, |
|
"step": 87505 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"eval_accuracy": 0.6506364922206507, |
|
"eval_f1": 0.6154588049677499, |
|
"eval_loss": 3.3124778270721436, |
|
"eval_precision": 0.614483037031078, |
|
"eval_recall": 0.6506364922206507, |
|
"eval_runtime": 653.1839, |
|
"eval_samples_per_second": 6.494, |
|
"eval_steps_per_second": 0.813, |
|
"step": 87505 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"grad_norm": 4.032207489013672, |
|
"learning_rate": 1.665444130077893e-05, |
|
"loss": 0.0901, |
|
"step": 89096 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"eval_accuracy": 0.6383781235266384, |
|
"eval_f1": 0.6217233442126047, |
|
"eval_loss": 3.3028151988983154, |
|
"eval_precision": 0.6277366002975752, |
|
"eval_recall": 0.6383781235266384, |
|
"eval_runtime": 651.3473, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 0.815, |
|
"step": 89096 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"grad_norm": 0.039573218673467636, |
|
"learning_rate": 1.5959778546229348e-05, |
|
"loss": 0.0776, |
|
"step": 90687 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"eval_accuracy": 0.6487505893446488, |
|
"eval_f1": 0.6297653212708532, |
|
"eval_loss": 3.3314833641052246, |
|
"eval_precision": 0.6272429266150353, |
|
"eval_recall": 0.6487505893446488, |
|
"eval_runtime": 649.3376, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 0.818, |
|
"step": 90687 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"grad_norm": 0.0005812808522023261, |
|
"learning_rate": 1.5265115791679766e-05, |
|
"loss": 0.0837, |
|
"step": 92278 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"eval_accuracy": 0.6558227251296558, |
|
"eval_f1": 0.6242411319775614, |
|
"eval_loss": 3.438481092453003, |
|
"eval_precision": 0.6374388645077231, |
|
"eval_recall": 0.6558227251296558, |
|
"eval_runtime": 658.6971, |
|
"eval_samples_per_second": 6.44, |
|
"eval_steps_per_second": 0.806, |
|
"step": 92278 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"grad_norm": 0.04262514412403107, |
|
"learning_rate": 1.4570453037130182e-05, |
|
"loss": 0.0701, |
|
"step": 93869 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"eval_accuracy": 0.644035832154644, |
|
"eval_f1": 0.6285778416417378, |
|
"eval_loss": 3.3799736499786377, |
|
"eval_precision": 0.6320916006510833, |
|
"eval_recall": 0.644035832154644, |
|
"eval_runtime": 669.2439, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 0.793, |
|
"step": 93869 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"grad_norm": 0.001757206628099084, |
|
"learning_rate": 1.38757902825806e-05, |
|
"loss": 0.0682, |
|
"step": 95460 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.6541725601131542, |
|
"eval_f1": 0.6262011153983429, |
|
"eval_loss": 3.4473154544830322, |
|
"eval_precision": 0.6343707774148433, |
|
"eval_recall": 0.6541725601131542, |
|
"eval_runtime": 660.2778, |
|
"eval_samples_per_second": 6.425, |
|
"eval_steps_per_second": 0.804, |
|
"step": 95460 |
|
}, |
|
{ |
|
"epoch": 60.01, |
|
"grad_norm": 0.08907134085893631, |
|
"learning_rate": 1.3181127528031017e-05, |
|
"loss": 0.0763, |
|
"step": 97051 |
|
}, |
|
{ |
|
"epoch": 60.01, |
|
"eval_accuracy": 0.6315417256011315, |
|
"eval_f1": 0.6148352414634933, |
|
"eval_loss": 3.450514316558838, |
|
"eval_precision": 0.6148635373339948, |
|
"eval_recall": 0.6315417256011315, |
|
"eval_runtime": 666.8615, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 0.796, |
|
"step": 97051 |
|
}, |
|
{ |
|
"epoch": 61.01, |
|
"grad_norm": 0.0005003924597986042, |
|
"learning_rate": 1.2486464773481436e-05, |
|
"loss": 0.0629, |
|
"step": 98642 |
|
}, |
|
{ |
|
"epoch": 61.01, |
|
"eval_accuracy": 0.6504007543611504, |
|
"eval_f1": 0.6253235814801552, |
|
"eval_loss": 3.440239906311035, |
|
"eval_precision": 0.6233462363989224, |
|
"eval_recall": 0.6504007543611504, |
|
"eval_runtime": 653.614, |
|
"eval_samples_per_second": 6.49, |
|
"eval_steps_per_second": 0.812, |
|
"step": 98642 |
|
}, |
|
{ |
|
"epoch": 62.01, |
|
"grad_norm": 0.06604283303022385, |
|
"learning_rate": 1.1791802018931852e-05, |
|
"loss": 0.0552, |
|
"step": 100233 |
|
}, |
|
{ |
|
"epoch": 62.01, |
|
"eval_accuracy": 0.6537010843941538, |
|
"eval_f1": 0.6314537569564033, |
|
"eval_loss": 3.4401602745056152, |
|
"eval_precision": 0.6324239134320033, |
|
"eval_recall": 0.6537010843941538, |
|
"eval_runtime": 663.5087, |
|
"eval_samples_per_second": 6.393, |
|
"eval_steps_per_second": 0.8, |
|
"step": 100233 |
|
}, |
|
{ |
|
"epoch": 63.01, |
|
"grad_norm": 0.00693962536752224, |
|
"learning_rate": 1.109713926438227e-05, |
|
"loss": 0.0463, |
|
"step": 101824 |
|
}, |
|
{ |
|
"epoch": 63.01, |
|
"eval_accuracy": 0.6466289486091467, |
|
"eval_f1": 0.6216674773439607, |
|
"eval_loss": 3.529994010925293, |
|
"eval_precision": 0.6217012879573426, |
|
"eval_recall": 0.6466289486091467, |
|
"eval_runtime": 686.7487, |
|
"eval_samples_per_second": 6.177, |
|
"eval_steps_per_second": 0.773, |
|
"step": 101824 |
|
}, |
|
{ |
|
"epoch": 64.01, |
|
"grad_norm": 0.0005720761837437749, |
|
"learning_rate": 1.0402476509832687e-05, |
|
"loss": 0.0471, |
|
"step": 103415 |
|
}, |
|
{ |
|
"epoch": 64.01, |
|
"eval_accuracy": 0.6511079679396511, |
|
"eval_f1": 0.6222590555700158, |
|
"eval_loss": 3.6793229579925537, |
|
"eval_precision": 0.6346230225230813, |
|
"eval_recall": 0.6511079679396511, |
|
"eval_runtime": 669.336, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 0.793, |
|
"step": 103415 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"grad_norm": 0.0010839367751032114, |
|
"learning_rate": 9.707813755283105e-06, |
|
"loss": 0.0448, |
|
"step": 105006 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"eval_accuracy": 0.6449787835926449, |
|
"eval_f1": 0.6169893577308092, |
|
"eval_loss": 3.685042142868042, |
|
"eval_precision": 0.6265011843748146, |
|
"eval_recall": 0.6449787835926449, |
|
"eval_runtime": 670.2367, |
|
"eval_samples_per_second": 6.329, |
|
"eval_steps_per_second": 0.792, |
|
"step": 105006 |
|
}, |
|
{ |
|
"epoch": 66.01, |
|
"grad_norm": 0.03215405344963074, |
|
"learning_rate": 9.013151000733522e-06, |
|
"loss": 0.0362, |
|
"step": 106597 |
|
}, |
|
{ |
|
"epoch": 66.01, |
|
"eval_accuracy": 0.6482791136256483, |
|
"eval_f1": 0.624213528202086, |
|
"eval_loss": 3.658543348312378, |
|
"eval_precision": 0.6265115566919579, |
|
"eval_recall": 0.6482791136256483, |
|
"eval_runtime": 655.7488, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.81, |
|
"step": 106597 |
|
}, |
|
{ |
|
"epoch": 67.01, |
|
"grad_norm": 0.00014659887528978288, |
|
"learning_rate": 8.31848824618394e-06, |
|
"loss": 0.0419, |
|
"step": 108188 |
|
}, |
|
{ |
|
"epoch": 67.01, |
|
"eval_accuracy": 0.6343705799151343, |
|
"eval_f1": 0.6168968855551049, |
|
"eval_loss": 3.6284878253936768, |
|
"eval_precision": 0.6192216504465796, |
|
"eval_recall": 0.6343705799151343, |
|
"eval_runtime": 667.7711, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 0.795, |
|
"step": 108188 |
|
}, |
|
{ |
|
"epoch": 68.01, |
|
"grad_norm": 0.0006045685149729252, |
|
"learning_rate": 7.6238254916343565e-06, |
|
"loss": 0.0309, |
|
"step": 109779 |
|
}, |
|
{ |
|
"epoch": 68.01, |
|
"eval_accuracy": 0.648986327204149, |
|
"eval_f1": 0.6269316812634602, |
|
"eval_loss": 3.665743112564087, |
|
"eval_precision": 0.6264133629902356, |
|
"eval_recall": 0.648986327204149, |
|
"eval_runtime": 670.5591, |
|
"eval_samples_per_second": 6.326, |
|
"eval_steps_per_second": 0.792, |
|
"step": 109779 |
|
}, |
|
{ |
|
"epoch": 69.01, |
|
"grad_norm": 0.0056765577755868435, |
|
"learning_rate": 6.929162737084774e-06, |
|
"loss": 0.0312, |
|
"step": 111370 |
|
}, |
|
{ |
|
"epoch": 69.01, |
|
"eval_accuracy": 0.6416784535596417, |
|
"eval_f1": 0.6205254067422245, |
|
"eval_loss": 3.7122817039489746, |
|
"eval_precision": 0.6239256088005212, |
|
"eval_recall": 0.6416784535596417, |
|
"eval_runtime": 661.8739, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 0.802, |
|
"step": 111370 |
|
}, |
|
{ |
|
"epoch": 70.01, |
|
"grad_norm": 0.0024060504510998726, |
|
"learning_rate": 6.234499982535192e-06, |
|
"loss": 0.0315, |
|
"step": 112961 |
|
}, |
|
{ |
|
"epoch": 70.01, |
|
"eval_accuracy": 0.648986327204149, |
|
"eval_f1": 0.6189187635444573, |
|
"eval_loss": 3.753802537918091, |
|
"eval_precision": 0.6224224422132874, |
|
"eval_recall": 0.648986327204149, |
|
"eval_runtime": 664.7996, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 0.799, |
|
"step": 112961 |
|
}, |
|
{ |
|
"epoch": 71.01, |
|
"grad_norm": 0.047281160950660706, |
|
"learning_rate": 5.53983722798561e-06, |
|
"loss": 0.0294, |
|
"step": 114552 |
|
}, |
|
{ |
|
"epoch": 71.01, |
|
"eval_accuracy": 0.6482791136256483, |
|
"eval_f1": 0.6236630466746416, |
|
"eval_loss": 3.706387519836426, |
|
"eval_precision": 0.6234001348683471, |
|
"eval_recall": 0.6482791136256483, |
|
"eval_runtime": 677.1081, |
|
"eval_samples_per_second": 6.265, |
|
"eval_steps_per_second": 0.784, |
|
"step": 114552 |
|
}, |
|
{ |
|
"epoch": 72.01, |
|
"grad_norm": 0.00014872274186927825, |
|
"learning_rate": 4.845174473436027e-06, |
|
"loss": 0.0282, |
|
"step": 116143 |
|
}, |
|
{ |
|
"epoch": 72.01, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_f1": 0.6192426747671007, |
|
"eval_loss": 3.7945356369018555, |
|
"eval_precision": 0.624713412639683, |
|
"eval_recall": 0.6428571428571429, |
|
"eval_runtime": 677.029, |
|
"eval_samples_per_second": 6.266, |
|
"eval_steps_per_second": 0.784, |
|
"step": 116143 |
|
}, |
|
{ |
|
"epoch": 73.01, |
|
"grad_norm": 0.0006783913122490048, |
|
"learning_rate": 4.150511718886444e-06, |
|
"loss": 0.0275, |
|
"step": 117734 |
|
}, |
|
{ |
|
"epoch": 73.01, |
|
"eval_accuracy": 0.6527581329561527, |
|
"eval_f1": 0.6271859964586763, |
|
"eval_loss": 3.7549855709075928, |
|
"eval_precision": 0.6297463780952456, |
|
"eval_recall": 0.6527581329561527, |
|
"eval_runtime": 694.5725, |
|
"eval_samples_per_second": 6.107, |
|
"eval_steps_per_second": 0.764, |
|
"step": 117734 |
|
}, |
|
{ |
|
"epoch": 74.01, |
|
"grad_norm": 0.10241026431322098, |
|
"learning_rate": 3.4558489643368614e-06, |
|
"loss": 0.0319, |
|
"step": 119325 |
|
}, |
|
{ |
|
"epoch": 74.01, |
|
"eval_accuracy": 0.6508722300801508, |
|
"eval_f1": 0.6233733427088337, |
|
"eval_loss": 3.740715980529785, |
|
"eval_precision": 0.628877991091339, |
|
"eval_recall": 0.6508722300801508, |
|
"eval_runtime": 654.6009, |
|
"eval_samples_per_second": 6.48, |
|
"eval_steps_per_second": 0.811, |
|
"step": 119325 |
|
}, |
|
{ |
|
"epoch": 75.01, |
|
"grad_norm": 0.00016077565669547766, |
|
"learning_rate": 2.7611862097872788e-06, |
|
"loss": 0.021, |
|
"step": 120916 |
|
}, |
|
{ |
|
"epoch": 75.01, |
|
"eval_accuracy": 0.6532296086751532, |
|
"eval_f1": 0.6269729664014984, |
|
"eval_loss": 3.75272536277771, |
|
"eval_precision": 0.6290218177795819, |
|
"eval_recall": 0.6532296086751532, |
|
"eval_runtime": 658.8706, |
|
"eval_samples_per_second": 6.438, |
|
"eval_steps_per_second": 0.806, |
|
"step": 120916 |
|
}, |
|
{ |
|
"epoch": 76.01, |
|
"grad_norm": 0.0008236940484493971, |
|
"learning_rate": 2.066523455237696e-06, |
|
"loss": 0.0159, |
|
"step": 122507 |
|
}, |
|
{ |
|
"epoch": 76.01, |
|
"eval_accuracy": 0.6515794436586516, |
|
"eval_f1": 0.6242553810002985, |
|
"eval_loss": 3.7779977321624756, |
|
"eval_precision": 0.6240515257248215, |
|
"eval_recall": 0.6515794436586516, |
|
"eval_runtime": 660.4492, |
|
"eval_samples_per_second": 6.423, |
|
"eval_steps_per_second": 0.804, |
|
"step": 122507 |
|
}, |
|
{ |
|
"epoch": 77.01, |
|
"grad_norm": 0.004408856853842735, |
|
"learning_rate": 1.3718607006881136e-06, |
|
"loss": 0.0133, |
|
"step": 124098 |
|
}, |
|
{ |
|
"epoch": 77.01, |
|
"eval_accuracy": 0.6499292786421499, |
|
"eval_f1": 0.624040880732074, |
|
"eval_loss": 3.7923333644866943, |
|
"eval_precision": 0.6271592552161068, |
|
"eval_recall": 0.6499292786421499, |
|
"eval_runtime": 666.8007, |
|
"eval_samples_per_second": 6.362, |
|
"eval_steps_per_second": 0.796, |
|
"step": 124098 |
|
}, |
|
{ |
|
"epoch": 78.01, |
|
"grad_norm": 0.0014601564034819603, |
|
"learning_rate": 6.771979461385309e-07, |
|
"loss": 0.0125, |
|
"step": 125689 |
|
}, |
|
{ |
|
"epoch": 78.01, |
|
"eval_accuracy": 0.6504007543611504, |
|
"eval_f1": 0.6216504935099332, |
|
"eval_loss": 3.8070006370544434, |
|
"eval_precision": 0.6262856082095383, |
|
"eval_recall": 0.6504007543611504, |
|
"eval_runtime": 667.8654, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 0.795, |
|
"step": 125689 |
|
}, |
|
{ |
|
"epoch": 79.01, |
|
"grad_norm": 0.0005495115183293819, |
|
"learning_rate": 0.0, |
|
"loss": 0.0132, |
|
"step": 127240 |
|
}, |
|
{ |
|
"epoch": 79.01, |
|
"eval_accuracy": 0.6506364922206507, |
|
"eval_f1": 0.6225468142229464, |
|
"eval_loss": 3.796358108520508, |
|
"eval_precision": 0.6264178866401664, |
|
"eval_recall": 0.6506364922206507, |
|
"eval_runtime": 698.0179, |
|
"eval_samples_per_second": 6.077, |
|
"eval_steps_per_second": 0.761, |
|
"step": 127240 |
|
}, |
|
{ |
|
"epoch": 79.01, |
|
"step": 127240, |
|
"total_flos": 1.2682038581932563e+21, |
|
"train_loss": 0.41676221998635044, |
|
"train_runtime": 266875.9954, |
|
"train_samples_per_second": 3.814, |
|
"train_steps_per_second": 0.477 |
|
}, |
|
{ |
|
"epoch": 79.01, |
|
"eval_accuracy": 0.6697312588401697, |
|
"eval_f1": 0.635434606603408, |
|
"eval_loss": 1.1357100009918213, |
|
"eval_precision": 0.642912037609069, |
|
"eval_recall": 0.6697312588401697, |
|
"eval_runtime": 859.5426, |
|
"eval_samples_per_second": 4.935, |
|
"eval_steps_per_second": 0.618, |
|
"step": 127240 |
|
}, |
|
{ |
|
"epoch": 79.01, |
|
"eval_accuracy": 0.6697312588401697, |
|
"eval_f1": 0.635434606603408, |
|
"eval_loss": 1.1357100009918213, |
|
"eval_precision": 0.642912037609069, |
|
"eval_recall": 0.6697312588401697, |
|
"eval_runtime": 860.1584, |
|
"eval_samples_per_second": 4.932, |
|
"eval_steps_per_second": 0.617, |
|
"step": 127240 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 127240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.2682038581932563e+21, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|