AnnantJain's picture
Upload folder using huggingface_hub
4caf23d verified
{
"best_metric": 0.8376479645847368,
"best_model_checkpoint": "./XLMR-large2-multi-109k-multi-outputs/checkpoint-40000",
"epoch": 7.893792608539648,
"eval_steps": 1000,
"global_step": 44000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17940437746681018,
"grad_norm": 6.077027320861816,
"learning_rate": 2.242152466367713e-07,
"loss": 0.7043,
"step": 1000
},
{
"epoch": 0.17940437746681018,
"eval_accuracy": 0.5152891310929458,
"eval_f1": 0.4964881014781424,
"eval_loss": 0.6920226216316223,
"eval_precision": 0.522852726871274,
"eval_recall": 0.4726546906187625,
"eval_runtime": 103.5147,
"eval_samples_per_second": 95.726,
"eval_steps_per_second": 5.989,
"step": 1000
},
{
"epoch": 0.35880875493362036,
"grad_norm": 7.982357501983643,
"learning_rate": 4.484304932735426e-07,
"loss": 0.6972,
"step": 2000
},
{
"epoch": 0.35880875493362036,
"eval_accuracy": 0.5537390251286709,
"eval_f1": 0.514065934065934,
"eval_loss": 0.6867982745170593,
"eval_precision": 0.571882640586797,
"eval_recall": 0.4668662674650699,
"eval_runtime": 103.803,
"eval_samples_per_second": 95.46,
"eval_steps_per_second": 5.973,
"step": 2000
},
{
"epoch": 0.5382131324004306,
"grad_norm": 10.05902099609375,
"learning_rate": 6.72645739910314e-07,
"loss": 0.6892,
"step": 3000
},
{
"epoch": 0.5382131324004306,
"eval_accuracy": 0.5728125946109597,
"eval_f1": 0.44280637093589575,
"eval_loss": 0.6743206977844238,
"eval_precision": 0.6501739466563587,
"eval_recall": 0.33572854291417165,
"eval_runtime": 103.5733,
"eval_samples_per_second": 95.671,
"eval_steps_per_second": 5.986,
"step": 3000
},
{
"epoch": 0.7176175098672407,
"grad_norm": 17.559974670410156,
"learning_rate": 8.968609865470852e-07,
"loss": 0.6686,
"step": 4000
},
{
"epoch": 0.7176175098672407,
"eval_accuracy": 0.620345140781108,
"eval_f1": 0.6229705351773902,
"eval_loss": 0.6282544136047363,
"eval_precision": 0.6256038647342995,
"eval_recall": 0.6203592814371257,
"eval_runtime": 103.1262,
"eval_samples_per_second": 96.086,
"eval_steps_per_second": 6.012,
"step": 4000
},
{
"epoch": 0.897021887334051,
"grad_norm": 17.958587646484375,
"learning_rate": 9.865444034685537e-07,
"loss": 0.6251,
"step": 5000
},
{
"epoch": 0.897021887334051,
"eval_accuracy": 0.6844282975073166,
"eval_f1": 0.671568112593215,
"eval_loss": 0.5713071227073669,
"eval_precision": 0.7087120372422966,
"eval_recall": 0.63812375249501,
"eval_runtime": 103.2452,
"eval_samples_per_second": 95.975,
"eval_steps_per_second": 6.005,
"step": 5000
},
{
"epoch": 1.0764262648008611,
"grad_norm": 21.860984802246094,
"learning_rate": 9.616266321140236e-07,
"loss": 0.559,
"step": 6000
},
{
"epoch": 1.0764262648008611,
"eval_accuracy": 0.7250983953981229,
"eval_f1": 0.6946188340807175,
"eval_loss": 0.48871228098869324,
"eval_precision": 0.7923273657289003,
"eval_recall": 0.6183632734530938,
"eval_runtime": 102.8682,
"eval_samples_per_second": 96.327,
"eval_steps_per_second": 6.027,
"step": 6000
},
{
"epoch": 1.2558306422676713,
"grad_norm": 144.90872192382812,
"learning_rate": 9.367088607594936e-07,
"loss": 0.4901,
"step": 7000
},
{
"epoch": 1.2558306422676713,
"eval_accuracy": 0.7360984963164799,
"eval_f1": 0.7147998691242229,
"eval_loss": 0.4424116909503937,
"eval_precision": 0.7879297908150997,
"eval_recall": 0.6540918163672654,
"eval_runtime": 103.0989,
"eval_samples_per_second": 96.112,
"eval_steps_per_second": 6.014,
"step": 7000
},
{
"epoch": 1.4352350197344816,
"grad_norm": 23.610597610473633,
"learning_rate": 9.117910894049637e-07,
"loss": 0.4595,
"step": 8000
},
{
"epoch": 1.4352350197344816,
"eval_accuracy": 0.7460894136643456,
"eval_f1": 0.7595106098260371,
"eval_loss": 0.4278419017791748,
"eval_precision": 0.7287234042553191,
"eval_recall": 0.7930139720558882,
"eval_runtime": 103.2272,
"eval_samples_per_second": 95.992,
"eval_steps_per_second": 6.006,
"step": 8000
},
{
"epoch": 1.6146393972012918,
"grad_norm": 13.438475608825684,
"learning_rate": 8.868733180504335e-07,
"loss": 0.4431,
"step": 9000
},
{
"epoch": 1.6146393972012918,
"eval_accuracy": 0.7517408416590978,
"eval_f1": 0.7542457542457542,
"eval_loss": 0.4201831519603729,
"eval_precision": 0.755,
"eval_recall": 0.7534930139720559,
"eval_runtime": 103.1397,
"eval_samples_per_second": 96.074,
"eval_steps_per_second": 6.011,
"step": 9000
},
{
"epoch": 1.794043774668102,
"grad_norm": 59.678855895996094,
"learning_rate": 8.619555466959035e-07,
"loss": 0.4246,
"step": 10000
},
{
"epoch": 1.794043774668102,
"eval_accuracy": 0.7612271672217177,
"eval_f1": 0.795611610228058,
"eval_loss": 0.4052415192127228,
"eval_precision": 0.7013402375875724,
"eval_recall": 0.9191616766467066,
"eval_runtime": 103.3719,
"eval_samples_per_second": 95.858,
"eval_steps_per_second": 5.998,
"step": 10000
},
{
"epoch": 1.973448152134912,
"grad_norm": 56.009273529052734,
"learning_rate": 8.370377753413735e-07,
"loss": 0.4168,
"step": 11000
},
{
"epoch": 1.973448152134912,
"eval_accuracy": 0.7611262488646685,
"eval_f1": 0.7569565663825855,
"eval_loss": 0.39738962054252625,
"eval_precision": 0.7794459716641996,
"eval_recall": 0.7357285429141717,
"eval_runtime": 103.343,
"eval_samples_per_second": 95.885,
"eval_steps_per_second": 5.999,
"step": 11000
},
{
"epoch": 2.1528525296017222,
"grad_norm": 48.44904708862305,
"learning_rate": 8.121200039868433e-07,
"loss": 0.4074,
"step": 12000
},
{
"epoch": 2.1528525296017222,
"eval_accuracy": 0.7603189020082753,
"eval_f1": 0.7932445373030382,
"eval_loss": 0.42876219749450684,
"eval_precision": 0.7034120734908137,
"eval_recall": 0.9093812375249501,
"eval_runtime": 103.2666,
"eval_samples_per_second": 95.955,
"eval_steps_per_second": 6.004,
"step": 12000
},
{
"epoch": 2.3322569070685324,
"grad_norm": 7.191207408905029,
"learning_rate": 7.872022326323134e-07,
"loss": 0.398,
"step": 13000
},
{
"epoch": 2.3322569070685324,
"eval_accuracy": 0.7668785952164698,
"eval_f1": 0.7844747154319835,
"eval_loss": 0.39464080333709717,
"eval_precision": 0.7365101611772951,
"eval_recall": 0.8391217564870259,
"eval_runtime": 103.6045,
"eval_samples_per_second": 95.643,
"eval_steps_per_second": 5.984,
"step": 13000
},
{
"epoch": 2.5116612845353425,
"grad_norm": 8.779580116271973,
"learning_rate": 7.622844612777832e-07,
"loss": 0.4009,
"step": 14000
},
{
"epoch": 2.5116612845353425,
"eval_accuracy": 0.7699061459279443,
"eval_f1": 0.7972251867662753,
"eval_loss": 0.38235536217689514,
"eval_precision": 0.7189605389797883,
"eval_recall": 0.8946107784431138,
"eval_runtime": 103.5288,
"eval_samples_per_second": 95.713,
"eval_steps_per_second": 5.989,
"step": 14000
},
{
"epoch": 2.6910656620021527,
"grad_norm": 10.75382137298584,
"learning_rate": 7.373666899232532e-07,
"loss": 0.383,
"step": 15000
},
{
"epoch": 2.6910656620021527,
"eval_accuracy": 0.7800988999899081,
"eval_f1": 0.7934401365058299,
"eval_loss": 0.4023512005805969,
"eval_precision": 0.7555515435999278,
"eval_recall": 0.8353293413173652,
"eval_runtime": 103.2649,
"eval_samples_per_second": 95.957,
"eval_steps_per_second": 6.004,
"step": 15000
},
{
"epoch": 2.8704700394689633,
"grad_norm": 8.595725059509277,
"learning_rate": 7.124489185687232e-07,
"loss": 0.3869,
"step": 16000
},
{
"epoch": 2.8704700394689633,
"eval_accuracy": 0.7843374709859724,
"eval_f1": 0.7970753014908366,
"eval_loss": 0.3746848404407501,
"eval_precision": 0.7601883716717985,
"eval_recall": 0.8377245508982036,
"eval_runtime": 103.4123,
"eval_samples_per_second": 95.82,
"eval_steps_per_second": 5.995,
"step": 16000
},
{
"epoch": 3.0498744169357734,
"grad_norm": 30.062721252441406,
"learning_rate": 6.875311472141931e-07,
"loss": 0.3761,
"step": 17000
},
{
"epoch": 3.0498744169357734,
"eval_accuracy": 0.7884751236249874,
"eval_f1": 0.7946708463949843,
"eval_loss": 0.39211228489875793,
"eval_precision": 0.7803001154290111,
"eval_recall": 0.8095808383233533,
"eval_runtime": 103.6712,
"eval_samples_per_second": 95.581,
"eval_steps_per_second": 5.98,
"step": 17000
},
{
"epoch": 3.2292787944025836,
"grad_norm": 56.15926742553711,
"learning_rate": 6.62613375859663e-07,
"loss": 0.3609,
"step": 18000
},
{
"epoch": 3.2292787944025836,
"eval_accuracy": 0.784741144414169,
"eval_f1": 0.7985645481159694,
"eval_loss": 0.39061158895492554,
"eval_precision": 0.7578419071518193,
"eval_recall": 0.8439121756487026,
"eval_runtime": 103.0596,
"eval_samples_per_second": 96.148,
"eval_steps_per_second": 6.016,
"step": 18000
},
{
"epoch": 3.4086831718693937,
"grad_norm": 7.428126811981201,
"learning_rate": 6.376956045051331e-07,
"loss": 0.3535,
"step": 19000
},
{
"epoch": 3.4086831718693937,
"eval_accuracy": 0.7953375719043294,
"eval_f1": 0.8157368707977467,
"eval_loss": 0.3811704218387604,
"eval_precision": 0.7486657771847899,
"eval_recall": 0.8960079840319362,
"eval_runtime": 103.3599,
"eval_samples_per_second": 95.869,
"eval_steps_per_second": 5.998,
"step": 19000
},
{
"epoch": 3.588087549336204,
"grad_norm": 248.54281616210938,
"learning_rate": 6.127778331506029e-07,
"loss": 0.3497,
"step": 20000
},
{
"epoch": 3.588087549336204,
"eval_accuracy": 0.7977596124735089,
"eval_f1": 0.8121836925960637,
"eval_loss": 0.37064263224601746,
"eval_precision": 0.765547703180212,
"eval_recall": 0.8648702594810379,
"eval_runtime": 103.2293,
"eval_samples_per_second": 95.99,
"eval_steps_per_second": 6.006,
"step": 20000
},
{
"epoch": 3.767491926803014,
"grad_norm": 44.91804504394531,
"learning_rate": 5.87860061796073e-07,
"loss": 0.3543,
"step": 21000
},
{
"epoch": 3.767491926803014,
"eval_accuracy": 0.8025027752548188,
"eval_f1": 0.8141676953755579,
"eval_loss": 0.3442750871181488,
"eval_precision": 0.7764897663466763,
"eval_recall": 0.855688622754491,
"eval_runtime": 103.1871,
"eval_samples_per_second": 96.029,
"eval_steps_per_second": 6.009,
"step": 21000
},
{
"epoch": 3.946896304269824,
"grad_norm": 84.26334381103516,
"learning_rate": 5.629422904415428e-07,
"loss": 0.3425,
"step": 22000
},
{
"epoch": 3.946896304269824,
"eval_accuracy": 0.8035119588253103,
"eval_f1": 0.8079692277344905,
"eval_loss": 0.3556448519229889,
"eval_precision": 0.7985962175862741,
"eval_recall": 0.817564870259481,
"eval_runtime": 102.9714,
"eval_samples_per_second": 96.231,
"eval_steps_per_second": 6.021,
"step": 22000
},
{
"epoch": 4.126300681736635,
"grad_norm": 23.619245529174805,
"learning_rate": 5.380245190870128e-07,
"loss": 0.335,
"step": 23000
},
{
"epoch": 4.126300681736635,
"eval_accuracy": 0.8024018568977697,
"eval_f1": 0.8052903739061257,
"eval_loss": 0.3544567823410034,
"eval_precision": 0.8024177566389219,
"eval_recall": 0.808183632734531,
"eval_runtime": 102.8733,
"eval_samples_per_second": 96.322,
"eval_steps_per_second": 6.027,
"step": 23000
},
{
"epoch": 4.3057050592034445,
"grad_norm": 16.840389251708984,
"learning_rate": 5.131067477324828e-07,
"loss": 0.3222,
"step": 24000
},
{
"epoch": 4.3057050592034445,
"eval_accuracy": 0.8070441013220305,
"eval_f1": 0.8103550882761357,
"eval_loss": 0.349142849445343,
"eval_precision": 0.8054022082018928,
"eval_recall": 0.8153692614770459,
"eval_runtime": 103.0742,
"eval_samples_per_second": 96.135,
"eval_steps_per_second": 6.015,
"step": 24000
},
{
"epoch": 4.485109436670255,
"grad_norm": 49.83803939819336,
"learning_rate": 4.881889763779527e-07,
"loss": 0.3157,
"step": 25000
},
{
"epoch": 4.485109436670255,
"eval_accuracy": 0.8095670602482592,
"eval_f1": 0.8254232583957813,
"eval_loss": 0.357431560754776,
"eval_precision": 0.7692705638903259,
"eval_recall": 0.8904191616766467,
"eval_runtime": 103.3608,
"eval_samples_per_second": 95.868,
"eval_steps_per_second": 5.998,
"step": 25000
},
{
"epoch": 4.664513814137065,
"grad_norm": 134.8468475341797,
"learning_rate": 4.632712050234227e-07,
"loss": 0.3207,
"step": 26000
},
{
"epoch": 4.664513814137065,
"eval_accuracy": 0.8153194066000605,
"eval_f1": 0.8328156404165905,
"eval_loss": 0.34428831934928894,
"eval_precision": 0.7678571428571429,
"eval_recall": 0.9097804391217564,
"eval_runtime": 103.0601,
"eval_samples_per_second": 96.148,
"eval_steps_per_second": 6.016,
"step": 26000
},
{
"epoch": 4.843918191603875,
"grad_norm": 12.487037658691406,
"learning_rate": 4.3835343366889267e-07,
"loss": 0.3217,
"step": 27000
},
{
"epoch": 4.843918191603875,
"eval_accuracy": 0.8124936926026844,
"eval_f1": 0.8151611619578193,
"eval_loss": 0.3367626368999481,
"eval_precision": 0.8125743752479175,
"eval_recall": 0.8177644710578842,
"eval_runtime": 103.075,
"eval_samples_per_second": 96.134,
"eval_steps_per_second": 6.015,
"step": 27000
},
{
"epoch": 5.023322569070685,
"grad_norm": 10.074256896972656,
"learning_rate": 4.1343566231436264e-07,
"loss": 0.3184,
"step": 28000
},
{
"epoch": 5.023322569070685,
"eval_accuracy": 0.8171359370269452,
"eval_f1": 0.8204518430439952,
"eval_loss": 0.3432736396789551,
"eval_precision": 0.8146399055489965,
"eval_recall": 0.8263473053892215,
"eval_runtime": 103.2718,
"eval_samples_per_second": 95.951,
"eval_steps_per_second": 6.004,
"step": 28000
},
{
"epoch": 5.202726946537496,
"grad_norm": 30.08102035522461,
"learning_rate": 3.8851789095983255e-07,
"loss": 0.2981,
"step": 29000
},
{
"epoch": 5.202726946537496,
"eval_accuracy": 0.8162276718135029,
"eval_f1": 0.8155202107182656,
"eval_loss": 0.34637027978897095,
"eval_precision": 0.8280189261468833,
"eval_recall": 0.8033932135728543,
"eval_runtime": 102.9045,
"eval_samples_per_second": 96.293,
"eval_steps_per_second": 6.025,
"step": 29000
},
{
"epoch": 5.382131324004305,
"grad_norm": 12.194862365722656,
"learning_rate": 3.6360011960530246e-07,
"loss": 0.301,
"step": 30000
},
{
"epoch": 5.382131324004305,
"eval_accuracy": 0.817640528812191,
"eval_f1": 0.8211776348342404,
"eval_loss": 0.342290461063385,
"eval_precision": 0.8143277723258097,
"eval_recall": 0.8281437125748503,
"eval_runtime": 103.5562,
"eval_samples_per_second": 95.687,
"eval_steps_per_second": 5.987,
"step": 30000
},
{
"epoch": 5.561535701471116,
"grad_norm": 154.16159057617188,
"learning_rate": 3.386823482507724e-07,
"loss": 0.2979,
"step": 31000
},
{
"epoch": 5.561535701471116,
"eval_accuracy": 0.8201634877384196,
"eval_f1": 0.8214070956103428,
"eval_loss": 0.32883062958717346,
"eval_precision": 0.8248792270531401,
"eval_recall": 0.8179640718562874,
"eval_runtime": 107.2514,
"eval_samples_per_second": 92.39,
"eval_steps_per_second": 5.781,
"step": 31000
},
{
"epoch": 5.740940078937927,
"grad_norm": 20.60382080078125,
"learning_rate": 3.137645768962424e-07,
"loss": 0.2941,
"step": 32000
},
{
"epoch": 5.740940078937927,
"eval_accuracy": 0.8215763447371077,
"eval_f1": 0.8254689042448173,
"eval_loss": 0.341677188873291,
"eval_precision": 0.8166015625,
"eval_recall": 0.8345309381237525,
"eval_runtime": 103.1122,
"eval_samples_per_second": 96.099,
"eval_steps_per_second": 6.013,
"step": 32000
},
{
"epoch": 5.920344456404736,
"grad_norm": 27.749670028686523,
"learning_rate": 2.888468055417123e-07,
"loss": 0.3015,
"step": 33000
},
{
"epoch": 5.920344456404736,
"eval_accuracy": 0.8243011403774346,
"eval_f1": 0.8335404914427765,
"eval_loss": 0.33678942918777466,
"eval_precision": 0.799963296017618,
"eval_recall": 0.870059880239521,
"eval_runtime": 103.0115,
"eval_samples_per_second": 96.193,
"eval_steps_per_second": 6.019,
"step": 33000
},
{
"epoch": 6.099748833871547,
"grad_norm": 63.67295455932617,
"learning_rate": 2.6392903418718226e-07,
"loss": 0.2953,
"step": 34000
},
{
"epoch": 6.099748833871547,
"eval_accuracy": 0.8256130790190735,
"eval_f1": 0.8240684178375076,
"eval_loss": 0.33581623435020447,
"eval_precision": 0.8410224438902744,
"eval_recall": 0.8077844311377246,
"eval_runtime": 103.1426,
"eval_samples_per_second": 96.071,
"eval_steps_per_second": 6.011,
"step": 34000
},
{
"epoch": 6.279153211338357,
"grad_norm": 26.843647003173828,
"learning_rate": 2.390112628326522e-07,
"loss": 0.2852,
"step": 35000
},
{
"epoch": 6.279153211338357,
"eval_accuracy": 0.8249066505197296,
"eval_f1": 0.8327065856715842,
"eval_loss": 0.34431934356689453,
"eval_precision": 0.8054467450102593,
"eval_recall": 0.86187624750499,
"eval_runtime": 103.3497,
"eval_samples_per_second": 95.878,
"eval_steps_per_second": 5.999,
"step": 35000
},
{
"epoch": 6.458557588805167,
"grad_norm": 183.19422912597656,
"learning_rate": 2.140934914781222e-07,
"loss": 0.2917,
"step": 36000
},
{
"epoch": 6.458557588805167,
"eval_accuracy": 0.824502977091533,
"eval_f1": 0.8318669631634922,
"eval_loss": 0.34868115186691284,
"eval_precision": 0.8066754172135758,
"eval_recall": 0.858682634730539,
"eval_runtime": 103.3302,
"eval_samples_per_second": 95.897,
"eval_steps_per_second": 6.0,
"step": 36000
},
{
"epoch": 6.637961966271977,
"grad_norm": 10.319212913513184,
"learning_rate": 1.8917572012359216e-07,
"loss": 0.2844,
"step": 37000
},
{
"epoch": 6.637961966271977,
"eval_accuracy": 0.8261176708043193,
"eval_f1": 0.8294565970503811,
"eval_loss": 0.32437703013420105,
"eval_precision": 0.8226978205379933,
"eval_recall": 0.8363273453093812,
"eval_runtime": 104.1332,
"eval_samples_per_second": 95.157,
"eval_steps_per_second": 5.954,
"step": 37000
},
{
"epoch": 6.8173663437387875,
"grad_norm": 58.22975540161133,
"learning_rate": 1.642579487690621e-07,
"loss": 0.2837,
"step": 38000
},
{
"epoch": 6.8173663437387875,
"eval_accuracy": 0.8285397113734988,
"eval_f1": 0.8333823673629499,
"eval_loss": 0.3295113742351532,
"eval_precision": 0.8191632928475033,
"eval_recall": 0.8481037924151696,
"eval_runtime": 103.1218,
"eval_samples_per_second": 96.09,
"eval_steps_per_second": 6.012,
"step": 38000
},
{
"epoch": 6.996770721205597,
"grad_norm": 8.232932090759277,
"learning_rate": 1.3934017741453206e-07,
"loss": 0.283,
"step": 39000
},
{
"epoch": 6.996770721205597,
"eval_accuracy": 0.8263195075184177,
"eval_f1": 0.8296882731321128,
"eval_loss": 0.3371128439903259,
"eval_precision": 0.8227674190382728,
"eval_recall": 0.8367265469061876,
"eval_runtime": 103.2754,
"eval_samples_per_second": 95.947,
"eval_steps_per_second": 6.003,
"step": 39000
},
{
"epoch": 7.176175098672408,
"grad_norm": 18.62181282043457,
"learning_rate": 1.14422406060002e-07,
"loss": 0.2711,
"step": 40000
},
{
"epoch": 7.176175098672408,
"eval_accuracy": 0.8297507316580887,
"eval_f1": 0.8376479645847368,
"eval_loss": 0.32895320653915405,
"eval_precision": 0.8087716037911169,
"eval_recall": 0.8686626746506986,
"eval_runtime": 103.3849,
"eval_samples_per_second": 95.846,
"eval_steps_per_second": 5.997,
"step": 40000
},
{
"epoch": 7.3555794761392175,
"grad_norm": 15.900300025939941,
"learning_rate": 8.950463470547195e-08,
"loss": 0.273,
"step": 41000
},
{
"epoch": 7.3555794761392175,
"eval_accuracy": 0.8292461398728429,
"eval_f1": 0.8348946135831382,
"eval_loss": 0.34222128987312317,
"eval_precision": 0.8167239404352806,
"eval_recall": 0.8538922155688623,
"eval_runtime": 103.3124,
"eval_samples_per_second": 95.913,
"eval_steps_per_second": 6.001,
"step": 41000
},
{
"epoch": 7.534983853606028,
"grad_norm": 54.62172317504883,
"learning_rate": 6.45868633509419e-08,
"loss": 0.2795,
"step": 42000
},
{
"epoch": 7.534983853606028,
"eval_accuracy": 0.8275305278030074,
"eval_f1": 0.8291512546236129,
"eval_loss": 0.33169299364089966,
"eval_precision": 0.8305627879030643,
"eval_recall": 0.8277445109780439,
"eval_runtime": 103.4355,
"eval_samples_per_second": 95.799,
"eval_steps_per_second": 5.994,
"step": 42000
},
{
"epoch": 7.714388231072839,
"grad_norm": 47.589847564697266,
"learning_rate": 3.9669091996411835e-08,
"loss": 0.2739,
"step": 43000
},
{
"epoch": 7.714388231072839,
"eval_accuracy": 0.8305580785144818,
"eval_f1": 0.8372904351196822,
"eval_loss": 0.336332768201828,
"eval_precision": 0.8137125635712941,
"eval_recall": 0.8622754491017964,
"eval_runtime": 103.0445,
"eval_samples_per_second": 96.162,
"eval_steps_per_second": 6.017,
"step": 43000
},
{
"epoch": 7.893792608539648,
"grad_norm": 32.284854888916016,
"learning_rate": 1.475132064188179e-08,
"loss": 0.2771,
"step": 44000
},
{
"epoch": 7.893792608539648,
"eval_accuracy": 0.8294479765869411,
"eval_f1": 0.8322747121873759,
"eval_loss": 0.33281558752059937,
"eval_precision": 0.8276746940386893,
"eval_recall": 0.8369261477045908,
"eval_runtime": 103.2292,
"eval_samples_per_second": 95.99,
"eval_steps_per_second": 6.006,
"step": 44000
}
],
"logging_steps": 1000,
"max_steps": 44592,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.949009928618441e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}