|
{ |
|
"best_metric": 0.8376479645847368, |
|
"best_model_checkpoint": "./XLMR-large2-multi-109k-multi-outputs/checkpoint-40000", |
|
"epoch": 7.893792608539648, |
|
"eval_steps": 1000, |
|
"global_step": 44000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17940437746681018, |
|
"grad_norm": 6.077027320861816, |
|
"learning_rate": 2.242152466367713e-07, |
|
"loss": 0.7043, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17940437746681018, |
|
"eval_accuracy": 0.5152891310929458, |
|
"eval_f1": 0.4964881014781424, |
|
"eval_loss": 0.6920226216316223, |
|
"eval_precision": 0.522852726871274, |
|
"eval_recall": 0.4726546906187625, |
|
"eval_runtime": 103.5147, |
|
"eval_samples_per_second": 95.726, |
|
"eval_steps_per_second": 5.989, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35880875493362036, |
|
"grad_norm": 7.982357501983643, |
|
"learning_rate": 4.484304932735426e-07, |
|
"loss": 0.6972, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35880875493362036, |
|
"eval_accuracy": 0.5537390251286709, |
|
"eval_f1": 0.514065934065934, |
|
"eval_loss": 0.6867982745170593, |
|
"eval_precision": 0.571882640586797, |
|
"eval_recall": 0.4668662674650699, |
|
"eval_runtime": 103.803, |
|
"eval_samples_per_second": 95.46, |
|
"eval_steps_per_second": 5.973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5382131324004306, |
|
"grad_norm": 10.05902099609375, |
|
"learning_rate": 6.72645739910314e-07, |
|
"loss": 0.6892, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5382131324004306, |
|
"eval_accuracy": 0.5728125946109597, |
|
"eval_f1": 0.44280637093589575, |
|
"eval_loss": 0.6743206977844238, |
|
"eval_precision": 0.6501739466563587, |
|
"eval_recall": 0.33572854291417165, |
|
"eval_runtime": 103.5733, |
|
"eval_samples_per_second": 95.671, |
|
"eval_steps_per_second": 5.986, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7176175098672407, |
|
"grad_norm": 17.559974670410156, |
|
"learning_rate": 8.968609865470852e-07, |
|
"loss": 0.6686, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7176175098672407, |
|
"eval_accuracy": 0.620345140781108, |
|
"eval_f1": 0.6229705351773902, |
|
"eval_loss": 0.6282544136047363, |
|
"eval_precision": 0.6256038647342995, |
|
"eval_recall": 0.6203592814371257, |
|
"eval_runtime": 103.1262, |
|
"eval_samples_per_second": 96.086, |
|
"eval_steps_per_second": 6.012, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.897021887334051, |
|
"grad_norm": 17.958587646484375, |
|
"learning_rate": 9.865444034685537e-07, |
|
"loss": 0.6251, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.897021887334051, |
|
"eval_accuracy": 0.6844282975073166, |
|
"eval_f1": 0.671568112593215, |
|
"eval_loss": 0.5713071227073669, |
|
"eval_precision": 0.7087120372422966, |
|
"eval_recall": 0.63812375249501, |
|
"eval_runtime": 103.2452, |
|
"eval_samples_per_second": 95.975, |
|
"eval_steps_per_second": 6.005, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0764262648008611, |
|
"grad_norm": 21.860984802246094, |
|
"learning_rate": 9.616266321140236e-07, |
|
"loss": 0.559, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0764262648008611, |
|
"eval_accuracy": 0.7250983953981229, |
|
"eval_f1": 0.6946188340807175, |
|
"eval_loss": 0.48871228098869324, |
|
"eval_precision": 0.7923273657289003, |
|
"eval_recall": 0.6183632734530938, |
|
"eval_runtime": 102.8682, |
|
"eval_samples_per_second": 96.327, |
|
"eval_steps_per_second": 6.027, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.2558306422676713, |
|
"grad_norm": 144.90872192382812, |
|
"learning_rate": 9.367088607594936e-07, |
|
"loss": 0.4901, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2558306422676713, |
|
"eval_accuracy": 0.7360984963164799, |
|
"eval_f1": 0.7147998691242229, |
|
"eval_loss": 0.4424116909503937, |
|
"eval_precision": 0.7879297908150997, |
|
"eval_recall": 0.6540918163672654, |
|
"eval_runtime": 103.0989, |
|
"eval_samples_per_second": 96.112, |
|
"eval_steps_per_second": 6.014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4352350197344816, |
|
"grad_norm": 23.610597610473633, |
|
"learning_rate": 9.117910894049637e-07, |
|
"loss": 0.4595, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4352350197344816, |
|
"eval_accuracy": 0.7460894136643456, |
|
"eval_f1": 0.7595106098260371, |
|
"eval_loss": 0.4278419017791748, |
|
"eval_precision": 0.7287234042553191, |
|
"eval_recall": 0.7930139720558882, |
|
"eval_runtime": 103.2272, |
|
"eval_samples_per_second": 95.992, |
|
"eval_steps_per_second": 6.006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.6146393972012918, |
|
"grad_norm": 13.438475608825684, |
|
"learning_rate": 8.868733180504335e-07, |
|
"loss": 0.4431, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.6146393972012918, |
|
"eval_accuracy": 0.7517408416590978, |
|
"eval_f1": 0.7542457542457542, |
|
"eval_loss": 0.4201831519603729, |
|
"eval_precision": 0.755, |
|
"eval_recall": 0.7534930139720559, |
|
"eval_runtime": 103.1397, |
|
"eval_samples_per_second": 96.074, |
|
"eval_steps_per_second": 6.011, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.794043774668102, |
|
"grad_norm": 59.678855895996094, |
|
"learning_rate": 8.619555466959035e-07, |
|
"loss": 0.4246, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.794043774668102, |
|
"eval_accuracy": 0.7612271672217177, |
|
"eval_f1": 0.795611610228058, |
|
"eval_loss": 0.4052415192127228, |
|
"eval_precision": 0.7013402375875724, |
|
"eval_recall": 0.9191616766467066, |
|
"eval_runtime": 103.3719, |
|
"eval_samples_per_second": 95.858, |
|
"eval_steps_per_second": 5.998, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.973448152134912, |
|
"grad_norm": 56.009273529052734, |
|
"learning_rate": 8.370377753413735e-07, |
|
"loss": 0.4168, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.973448152134912, |
|
"eval_accuracy": 0.7611262488646685, |
|
"eval_f1": 0.7569565663825855, |
|
"eval_loss": 0.39738962054252625, |
|
"eval_precision": 0.7794459716641996, |
|
"eval_recall": 0.7357285429141717, |
|
"eval_runtime": 103.343, |
|
"eval_samples_per_second": 95.885, |
|
"eval_steps_per_second": 5.999, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.1528525296017222, |
|
"grad_norm": 48.44904708862305, |
|
"learning_rate": 8.121200039868433e-07, |
|
"loss": 0.4074, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.1528525296017222, |
|
"eval_accuracy": 0.7603189020082753, |
|
"eval_f1": 0.7932445373030382, |
|
"eval_loss": 0.42876219749450684, |
|
"eval_precision": 0.7034120734908137, |
|
"eval_recall": 0.9093812375249501, |
|
"eval_runtime": 103.2666, |
|
"eval_samples_per_second": 95.955, |
|
"eval_steps_per_second": 6.004, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.3322569070685324, |
|
"grad_norm": 7.191207408905029, |
|
"learning_rate": 7.872022326323134e-07, |
|
"loss": 0.398, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.3322569070685324, |
|
"eval_accuracy": 0.7668785952164698, |
|
"eval_f1": 0.7844747154319835, |
|
"eval_loss": 0.39464080333709717, |
|
"eval_precision": 0.7365101611772951, |
|
"eval_recall": 0.8391217564870259, |
|
"eval_runtime": 103.6045, |
|
"eval_samples_per_second": 95.643, |
|
"eval_steps_per_second": 5.984, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.5116612845353425, |
|
"grad_norm": 8.779580116271973, |
|
"learning_rate": 7.622844612777832e-07, |
|
"loss": 0.4009, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.5116612845353425, |
|
"eval_accuracy": 0.7699061459279443, |
|
"eval_f1": 0.7972251867662753, |
|
"eval_loss": 0.38235536217689514, |
|
"eval_precision": 0.7189605389797883, |
|
"eval_recall": 0.8946107784431138, |
|
"eval_runtime": 103.5288, |
|
"eval_samples_per_second": 95.713, |
|
"eval_steps_per_second": 5.989, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.6910656620021527, |
|
"grad_norm": 10.75382137298584, |
|
"learning_rate": 7.373666899232532e-07, |
|
"loss": 0.383, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.6910656620021527, |
|
"eval_accuracy": 0.7800988999899081, |
|
"eval_f1": 0.7934401365058299, |
|
"eval_loss": 0.4023512005805969, |
|
"eval_precision": 0.7555515435999278, |
|
"eval_recall": 0.8353293413173652, |
|
"eval_runtime": 103.2649, |
|
"eval_samples_per_second": 95.957, |
|
"eval_steps_per_second": 6.004, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.8704700394689633, |
|
"grad_norm": 8.595725059509277, |
|
"learning_rate": 7.124489185687232e-07, |
|
"loss": 0.3869, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.8704700394689633, |
|
"eval_accuracy": 0.7843374709859724, |
|
"eval_f1": 0.7970753014908366, |
|
"eval_loss": 0.3746848404407501, |
|
"eval_precision": 0.7601883716717985, |
|
"eval_recall": 0.8377245508982036, |
|
"eval_runtime": 103.4123, |
|
"eval_samples_per_second": 95.82, |
|
"eval_steps_per_second": 5.995, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.0498744169357734, |
|
"grad_norm": 30.062721252441406, |
|
"learning_rate": 6.875311472141931e-07, |
|
"loss": 0.3761, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.0498744169357734, |
|
"eval_accuracy": 0.7884751236249874, |
|
"eval_f1": 0.7946708463949843, |
|
"eval_loss": 0.39211228489875793, |
|
"eval_precision": 0.7803001154290111, |
|
"eval_recall": 0.8095808383233533, |
|
"eval_runtime": 103.6712, |
|
"eval_samples_per_second": 95.581, |
|
"eval_steps_per_second": 5.98, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.2292787944025836, |
|
"grad_norm": 56.15926742553711, |
|
"learning_rate": 6.62613375859663e-07, |
|
"loss": 0.3609, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.2292787944025836, |
|
"eval_accuracy": 0.784741144414169, |
|
"eval_f1": 0.7985645481159694, |
|
"eval_loss": 0.39061158895492554, |
|
"eval_precision": 0.7578419071518193, |
|
"eval_recall": 0.8439121756487026, |
|
"eval_runtime": 103.0596, |
|
"eval_samples_per_second": 96.148, |
|
"eval_steps_per_second": 6.016, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.4086831718693937, |
|
"grad_norm": 7.428126811981201, |
|
"learning_rate": 6.376956045051331e-07, |
|
"loss": 0.3535, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.4086831718693937, |
|
"eval_accuracy": 0.7953375719043294, |
|
"eval_f1": 0.8157368707977467, |
|
"eval_loss": 0.3811704218387604, |
|
"eval_precision": 0.7486657771847899, |
|
"eval_recall": 0.8960079840319362, |
|
"eval_runtime": 103.3599, |
|
"eval_samples_per_second": 95.869, |
|
"eval_steps_per_second": 5.998, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.588087549336204, |
|
"grad_norm": 248.54281616210938, |
|
"learning_rate": 6.127778331506029e-07, |
|
"loss": 0.3497, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.588087549336204, |
|
"eval_accuracy": 0.7977596124735089, |
|
"eval_f1": 0.8121836925960637, |
|
"eval_loss": 0.37064263224601746, |
|
"eval_precision": 0.765547703180212, |
|
"eval_recall": 0.8648702594810379, |
|
"eval_runtime": 103.2293, |
|
"eval_samples_per_second": 95.99, |
|
"eval_steps_per_second": 6.006, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.767491926803014, |
|
"grad_norm": 44.91804504394531, |
|
"learning_rate": 5.87860061796073e-07, |
|
"loss": 0.3543, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.767491926803014, |
|
"eval_accuracy": 0.8025027752548188, |
|
"eval_f1": 0.8141676953755579, |
|
"eval_loss": 0.3442750871181488, |
|
"eval_precision": 0.7764897663466763, |
|
"eval_recall": 0.855688622754491, |
|
"eval_runtime": 103.1871, |
|
"eval_samples_per_second": 96.029, |
|
"eval_steps_per_second": 6.009, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.946896304269824, |
|
"grad_norm": 84.26334381103516, |
|
"learning_rate": 5.629422904415428e-07, |
|
"loss": 0.3425, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.946896304269824, |
|
"eval_accuracy": 0.8035119588253103, |
|
"eval_f1": 0.8079692277344905, |
|
"eval_loss": 0.3556448519229889, |
|
"eval_precision": 0.7985962175862741, |
|
"eval_recall": 0.817564870259481, |
|
"eval_runtime": 102.9714, |
|
"eval_samples_per_second": 96.231, |
|
"eval_steps_per_second": 6.021, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.126300681736635, |
|
"grad_norm": 23.619245529174805, |
|
"learning_rate": 5.380245190870128e-07, |
|
"loss": 0.335, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.126300681736635, |
|
"eval_accuracy": 0.8024018568977697, |
|
"eval_f1": 0.8052903739061257, |
|
"eval_loss": 0.3544567823410034, |
|
"eval_precision": 0.8024177566389219, |
|
"eval_recall": 0.808183632734531, |
|
"eval_runtime": 102.8733, |
|
"eval_samples_per_second": 96.322, |
|
"eval_steps_per_second": 6.027, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.3057050592034445, |
|
"grad_norm": 16.840389251708984, |
|
"learning_rate": 5.131067477324828e-07, |
|
"loss": 0.3222, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.3057050592034445, |
|
"eval_accuracy": 0.8070441013220305, |
|
"eval_f1": 0.8103550882761357, |
|
"eval_loss": 0.349142849445343, |
|
"eval_precision": 0.8054022082018928, |
|
"eval_recall": 0.8153692614770459, |
|
"eval_runtime": 103.0742, |
|
"eval_samples_per_second": 96.135, |
|
"eval_steps_per_second": 6.015, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.485109436670255, |
|
"grad_norm": 49.83803939819336, |
|
"learning_rate": 4.881889763779527e-07, |
|
"loss": 0.3157, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.485109436670255, |
|
"eval_accuracy": 0.8095670602482592, |
|
"eval_f1": 0.8254232583957813, |
|
"eval_loss": 0.357431560754776, |
|
"eval_precision": 0.7692705638903259, |
|
"eval_recall": 0.8904191616766467, |
|
"eval_runtime": 103.3608, |
|
"eval_samples_per_second": 95.868, |
|
"eval_steps_per_second": 5.998, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.664513814137065, |
|
"grad_norm": 134.8468475341797, |
|
"learning_rate": 4.632712050234227e-07, |
|
"loss": 0.3207, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.664513814137065, |
|
"eval_accuracy": 0.8153194066000605, |
|
"eval_f1": 0.8328156404165905, |
|
"eval_loss": 0.34428831934928894, |
|
"eval_precision": 0.7678571428571429, |
|
"eval_recall": 0.9097804391217564, |
|
"eval_runtime": 103.0601, |
|
"eval_samples_per_second": 96.148, |
|
"eval_steps_per_second": 6.016, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.843918191603875, |
|
"grad_norm": 12.487037658691406, |
|
"learning_rate": 4.3835343366889267e-07, |
|
"loss": 0.3217, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.843918191603875, |
|
"eval_accuracy": 0.8124936926026844, |
|
"eval_f1": 0.8151611619578193, |
|
"eval_loss": 0.3367626368999481, |
|
"eval_precision": 0.8125743752479175, |
|
"eval_recall": 0.8177644710578842, |
|
"eval_runtime": 103.075, |
|
"eval_samples_per_second": 96.134, |
|
"eval_steps_per_second": 6.015, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.023322569070685, |
|
"grad_norm": 10.074256896972656, |
|
"learning_rate": 4.1343566231436264e-07, |
|
"loss": 0.3184, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.023322569070685, |
|
"eval_accuracy": 0.8171359370269452, |
|
"eval_f1": 0.8204518430439952, |
|
"eval_loss": 0.3432736396789551, |
|
"eval_precision": 0.8146399055489965, |
|
"eval_recall": 0.8263473053892215, |
|
"eval_runtime": 103.2718, |
|
"eval_samples_per_second": 95.951, |
|
"eval_steps_per_second": 6.004, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.202726946537496, |
|
"grad_norm": 30.08102035522461, |
|
"learning_rate": 3.8851789095983255e-07, |
|
"loss": 0.2981, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.202726946537496, |
|
"eval_accuracy": 0.8162276718135029, |
|
"eval_f1": 0.8155202107182656, |
|
"eval_loss": 0.34637027978897095, |
|
"eval_precision": 0.8280189261468833, |
|
"eval_recall": 0.8033932135728543, |
|
"eval_runtime": 102.9045, |
|
"eval_samples_per_second": 96.293, |
|
"eval_steps_per_second": 6.025, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.382131324004305, |
|
"grad_norm": 12.194862365722656, |
|
"learning_rate": 3.6360011960530246e-07, |
|
"loss": 0.301, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.382131324004305, |
|
"eval_accuracy": 0.817640528812191, |
|
"eval_f1": 0.8211776348342404, |
|
"eval_loss": 0.342290461063385, |
|
"eval_precision": 0.8143277723258097, |
|
"eval_recall": 0.8281437125748503, |
|
"eval_runtime": 103.5562, |
|
"eval_samples_per_second": 95.687, |
|
"eval_steps_per_second": 5.987, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.561535701471116, |
|
"grad_norm": 154.16159057617188, |
|
"learning_rate": 3.386823482507724e-07, |
|
"loss": 0.2979, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.561535701471116, |
|
"eval_accuracy": 0.8201634877384196, |
|
"eval_f1": 0.8214070956103428, |
|
"eval_loss": 0.32883062958717346, |
|
"eval_precision": 0.8248792270531401, |
|
"eval_recall": 0.8179640718562874, |
|
"eval_runtime": 107.2514, |
|
"eval_samples_per_second": 92.39, |
|
"eval_steps_per_second": 5.781, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.740940078937927, |
|
"grad_norm": 20.60382080078125, |
|
"learning_rate": 3.137645768962424e-07, |
|
"loss": 0.2941, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.740940078937927, |
|
"eval_accuracy": 0.8215763447371077, |
|
"eval_f1": 0.8254689042448173, |
|
"eval_loss": 0.341677188873291, |
|
"eval_precision": 0.8166015625, |
|
"eval_recall": 0.8345309381237525, |
|
"eval_runtime": 103.1122, |
|
"eval_samples_per_second": 96.099, |
|
"eval_steps_per_second": 6.013, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.920344456404736, |
|
"grad_norm": 27.749670028686523, |
|
"learning_rate": 2.888468055417123e-07, |
|
"loss": 0.3015, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.920344456404736, |
|
"eval_accuracy": 0.8243011403774346, |
|
"eval_f1": 0.8335404914427765, |
|
"eval_loss": 0.33678942918777466, |
|
"eval_precision": 0.799963296017618, |
|
"eval_recall": 0.870059880239521, |
|
"eval_runtime": 103.0115, |
|
"eval_samples_per_second": 96.193, |
|
"eval_steps_per_second": 6.019, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.099748833871547, |
|
"grad_norm": 63.67295455932617, |
|
"learning_rate": 2.6392903418718226e-07, |
|
"loss": 0.2953, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.099748833871547, |
|
"eval_accuracy": 0.8256130790190735, |
|
"eval_f1": 0.8240684178375076, |
|
"eval_loss": 0.33581623435020447, |
|
"eval_precision": 0.8410224438902744, |
|
"eval_recall": 0.8077844311377246, |
|
"eval_runtime": 103.1426, |
|
"eval_samples_per_second": 96.071, |
|
"eval_steps_per_second": 6.011, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.279153211338357, |
|
"grad_norm": 26.843647003173828, |
|
"learning_rate": 2.390112628326522e-07, |
|
"loss": 0.2852, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.279153211338357, |
|
"eval_accuracy": 0.8249066505197296, |
|
"eval_f1": 0.8327065856715842, |
|
"eval_loss": 0.34431934356689453, |
|
"eval_precision": 0.8054467450102593, |
|
"eval_recall": 0.86187624750499, |
|
"eval_runtime": 103.3497, |
|
"eval_samples_per_second": 95.878, |
|
"eval_steps_per_second": 5.999, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.458557588805167, |
|
"grad_norm": 183.19422912597656, |
|
"learning_rate": 2.140934914781222e-07, |
|
"loss": 0.2917, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.458557588805167, |
|
"eval_accuracy": 0.824502977091533, |
|
"eval_f1": 0.8318669631634922, |
|
"eval_loss": 0.34868115186691284, |
|
"eval_precision": 0.8066754172135758, |
|
"eval_recall": 0.858682634730539, |
|
"eval_runtime": 103.3302, |
|
"eval_samples_per_second": 95.897, |
|
"eval_steps_per_second": 6.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.637961966271977, |
|
"grad_norm": 10.319212913513184, |
|
"learning_rate": 1.8917572012359216e-07, |
|
"loss": 0.2844, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.637961966271977, |
|
"eval_accuracy": 0.8261176708043193, |
|
"eval_f1": 0.8294565970503811, |
|
"eval_loss": 0.32437703013420105, |
|
"eval_precision": 0.8226978205379933, |
|
"eval_recall": 0.8363273453093812, |
|
"eval_runtime": 104.1332, |
|
"eval_samples_per_second": 95.157, |
|
"eval_steps_per_second": 5.954, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.8173663437387875, |
|
"grad_norm": 58.22975540161133, |
|
"learning_rate": 1.642579487690621e-07, |
|
"loss": 0.2837, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.8173663437387875, |
|
"eval_accuracy": 0.8285397113734988, |
|
"eval_f1": 0.8333823673629499, |
|
"eval_loss": 0.3295113742351532, |
|
"eval_precision": 0.8191632928475033, |
|
"eval_recall": 0.8481037924151696, |
|
"eval_runtime": 103.1218, |
|
"eval_samples_per_second": 96.09, |
|
"eval_steps_per_second": 6.012, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.996770721205597, |
|
"grad_norm": 8.232932090759277, |
|
"learning_rate": 1.3934017741453206e-07, |
|
"loss": 0.283, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.996770721205597, |
|
"eval_accuracy": 0.8263195075184177, |
|
"eval_f1": 0.8296882731321128, |
|
"eval_loss": 0.3371128439903259, |
|
"eval_precision": 0.8227674190382728, |
|
"eval_recall": 0.8367265469061876, |
|
"eval_runtime": 103.2754, |
|
"eval_samples_per_second": 95.947, |
|
"eval_steps_per_second": 6.003, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.176175098672408, |
|
"grad_norm": 18.62181282043457, |
|
"learning_rate": 1.14422406060002e-07, |
|
"loss": 0.2711, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.176175098672408, |
|
"eval_accuracy": 0.8297507316580887, |
|
"eval_f1": 0.8376479645847368, |
|
"eval_loss": 0.32895320653915405, |
|
"eval_precision": 0.8087716037911169, |
|
"eval_recall": 0.8686626746506986, |
|
"eval_runtime": 103.3849, |
|
"eval_samples_per_second": 95.846, |
|
"eval_steps_per_second": 5.997, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.3555794761392175, |
|
"grad_norm": 15.900300025939941, |
|
"learning_rate": 8.950463470547195e-08, |
|
"loss": 0.273, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.3555794761392175, |
|
"eval_accuracy": 0.8292461398728429, |
|
"eval_f1": 0.8348946135831382, |
|
"eval_loss": 0.34222128987312317, |
|
"eval_precision": 0.8167239404352806, |
|
"eval_recall": 0.8538922155688623, |
|
"eval_runtime": 103.3124, |
|
"eval_samples_per_second": 95.913, |
|
"eval_steps_per_second": 6.001, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.534983853606028, |
|
"grad_norm": 54.62172317504883, |
|
"learning_rate": 6.45868633509419e-08, |
|
"loss": 0.2795, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 7.534983853606028, |
|
"eval_accuracy": 0.8275305278030074, |
|
"eval_f1": 0.8291512546236129, |
|
"eval_loss": 0.33169299364089966, |
|
"eval_precision": 0.8305627879030643, |
|
"eval_recall": 0.8277445109780439, |
|
"eval_runtime": 103.4355, |
|
"eval_samples_per_second": 95.799, |
|
"eval_steps_per_second": 5.994, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 7.714388231072839, |
|
"grad_norm": 47.589847564697266, |
|
"learning_rate": 3.9669091996411835e-08, |
|
"loss": 0.2739, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.714388231072839, |
|
"eval_accuracy": 0.8305580785144818, |
|
"eval_f1": 0.8372904351196822, |
|
"eval_loss": 0.336332768201828, |
|
"eval_precision": 0.8137125635712941, |
|
"eval_recall": 0.8622754491017964, |
|
"eval_runtime": 103.0445, |
|
"eval_samples_per_second": 96.162, |
|
"eval_steps_per_second": 6.017, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.893792608539648, |
|
"grad_norm": 32.284854888916016, |
|
"learning_rate": 1.475132064188179e-08, |
|
"loss": 0.2771, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 7.893792608539648, |
|
"eval_accuracy": 0.8294479765869411, |
|
"eval_f1": 0.8322747121873759, |
|
"eval_loss": 0.33281558752059937, |
|
"eval_precision": 0.8276746940386893, |
|
"eval_recall": 0.8369261477045908, |
|
"eval_runtime": 103.2292, |
|
"eval_samples_per_second": 95.99, |
|
"eval_steps_per_second": 6.006, |
|
"step": 44000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 44592, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.949009928618441e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|