|
{ |
|
"best_metric": 0.6971784234046936, |
|
"best_model_checkpoint": "./cardiffnlp-twitter-xlmr-finetuned-txtnly-all-42/checkpoint-16500", |
|
"epoch": 3.787878787878788, |
|
"eval_steps": 500, |
|
"global_step": 31500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.298647880554199, |
|
"learning_rate": 4.994023569023569e-05, |
|
"loss": 0.6122, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_f1": 0.4840638597456899, |
|
"eval_loss": 0.854165256023407, |
|
"eval_precision": 0.6558887250350466, |
|
"eval_recall": 0.49045198529069495, |
|
"eval_runtime": 5.9285, |
|
"eval_samples_per_second": 148.099, |
|
"eval_steps_per_second": 9.277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.411099433898926, |
|
"learning_rate": 4.988011063011063e-05, |
|
"loss": 0.5497, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_f1": 0.6209225023342669, |
|
"eval_loss": 0.8037390112876892, |
|
"eval_precision": 0.704421745545341, |
|
"eval_recall": 0.6070083321696225, |
|
"eval_runtime": 6.1691, |
|
"eval_samples_per_second": 142.322, |
|
"eval_steps_per_second": 8.915, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.836483001708984, |
|
"learning_rate": 4.9820105820105825e-05, |
|
"loss": 0.5404, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_f1": 0.3652071944289921, |
|
"eval_loss": 0.9700150489807129, |
|
"eval_precision": 0.5591482310679367, |
|
"eval_recall": 0.4176288227901131, |
|
"eval_runtime": 5.8886, |
|
"eval_samples_per_second": 149.101, |
|
"eval_steps_per_second": 9.34, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 13.717193603515625, |
|
"learning_rate": 4.975998075998076e-05, |
|
"loss": 0.5165, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_f1": 0.5369027892847279, |
|
"eval_loss": 0.744874894618988, |
|
"eval_precision": 0.7349445049700448, |
|
"eval_recall": 0.529664385793418, |
|
"eval_runtime": 5.996, |
|
"eval_samples_per_second": 146.43, |
|
"eval_steps_per_second": 9.173, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.4534995555877686, |
|
"learning_rate": 4.969997594997595e-05, |
|
"loss": 0.5136, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_f1": 0.5001381202499963, |
|
"eval_loss": 0.7884698510169983, |
|
"eval_precision": 0.6766332095394413, |
|
"eval_recall": 0.5025275799469348, |
|
"eval_runtime": 5.9085, |
|
"eval_samples_per_second": 148.6, |
|
"eval_steps_per_second": 9.309, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.195244550704956, |
|
"learning_rate": 4.963985088985089e-05, |
|
"loss": 0.5072, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1": 0.5917137619940201, |
|
"eval_loss": 0.8123684525489807, |
|
"eval_precision": 0.6076358199852175, |
|
"eval_recall": 0.6132374435600242, |
|
"eval_runtime": 6.1108, |
|
"eval_samples_per_second": 143.68, |
|
"eval_steps_per_second": 9.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.579603672027588, |
|
"learning_rate": 4.957972582972583e-05, |
|
"loss": 0.5011, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_f1": 0.578405909718061, |
|
"eval_loss": 0.8767459392547607, |
|
"eval_precision": 0.642659899090607, |
|
"eval_recall": 0.5987143322627193, |
|
"eval_runtime": 6.1563, |
|
"eval_samples_per_second": 142.618, |
|
"eval_steps_per_second": 8.934, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.266787052154541, |
|
"learning_rate": 4.951960076960077e-05, |
|
"loss": 0.5021, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_f1": 0.6502990015105321, |
|
"eval_loss": 0.7957776784896851, |
|
"eval_precision": 0.6847923256926328, |
|
"eval_recall": 0.636192338127822, |
|
"eval_runtime": 6.5221, |
|
"eval_samples_per_second": 134.618, |
|
"eval_steps_per_second": 8.433, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.044332027435303, |
|
"learning_rate": 4.945959595959596e-05, |
|
"loss": 0.4946, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_f1": 0.4982912515017284, |
|
"eval_loss": 0.8045271039009094, |
|
"eval_precision": 0.7220405815528763, |
|
"eval_recall": 0.4968300516687614, |
|
"eval_runtime": 6.1928, |
|
"eval_samples_per_second": 141.778, |
|
"eval_steps_per_second": 8.881, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.152063846588135, |
|
"learning_rate": 4.93994708994709e-05, |
|
"loss": 0.4928, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_f1": 0.550273048506264, |
|
"eval_loss": 0.780342698097229, |
|
"eval_precision": 0.7581894624319455, |
|
"eval_recall": 0.5380887213145278, |
|
"eval_runtime": 6.123, |
|
"eval_samples_per_second": 143.395, |
|
"eval_steps_per_second": 8.983, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.54200553894043, |
|
"learning_rate": 4.933934583934584e-05, |
|
"loss": 0.5008, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_f1": 0.4594232264185665, |
|
"eval_loss": 0.7507085204124451, |
|
"eval_precision": 0.44070483572560937, |
|
"eval_recall": 0.47984452823162504, |
|
"eval_runtime": 5.932, |
|
"eval_samples_per_second": 148.011, |
|
"eval_steps_per_second": 9.272, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.075632095336914, |
|
"learning_rate": 4.927922077922078e-05, |
|
"loss": 0.4966, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_f1": 0.6310991936984806, |
|
"eval_loss": 0.8238988518714905, |
|
"eval_precision": 0.6139657275796522, |
|
"eval_recall": 0.6767434715821813, |
|
"eval_runtime": 5.8918, |
|
"eval_samples_per_second": 149.02, |
|
"eval_steps_per_second": 9.335, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.8725104331970215, |
|
"learning_rate": 4.921909571909572e-05, |
|
"loss": 0.4791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_f1": 0.5412559573187593, |
|
"eval_loss": 0.7028306722640991, |
|
"eval_precision": 0.6567775474615866, |
|
"eval_recall": 0.520631196760229, |
|
"eval_runtime": 6.3113, |
|
"eval_samples_per_second": 139.116, |
|
"eval_steps_per_second": 8.715, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.4915893077850342, |
|
"learning_rate": 4.915897065897066e-05, |
|
"loss": 0.494, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_f1": 0.5227267406470947, |
|
"eval_loss": 0.8033522367477417, |
|
"eval_precision": 0.6660302960734323, |
|
"eval_recall": 0.5188623562817111, |
|
"eval_runtime": 6.1252, |
|
"eval_samples_per_second": 143.342, |
|
"eval_steps_per_second": 8.979, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.151014804840088, |
|
"learning_rate": 4.90988455988456e-05, |
|
"loss": 0.4861, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_f1": 0.4541201667750796, |
|
"eval_loss": 0.900325357913971, |
|
"eval_precision": 0.5780562441152168, |
|
"eval_recall": 0.4784564539403249, |
|
"eval_runtime": 6.144, |
|
"eval_samples_per_second": 142.903, |
|
"eval_steps_per_second": 8.952, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.770496368408203, |
|
"learning_rate": 4.903872053872054e-05, |
|
"loss": 0.4804, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_f1": 0.5791890202588422, |
|
"eval_loss": 0.773960530757904, |
|
"eval_precision": 0.6238945275403609, |
|
"eval_recall": 0.5775003491132523, |
|
"eval_runtime": 6.556, |
|
"eval_samples_per_second": 133.923, |
|
"eval_steps_per_second": 8.389, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.520460367202759, |
|
"learning_rate": 4.897859547859548e-05, |
|
"loss": 0.4614, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_f1": 0.6470888284841774, |
|
"eval_loss": 0.7397181391716003, |
|
"eval_precision": 0.6848151355984641, |
|
"eval_recall": 0.6312358609132803, |
|
"eval_runtime": 6.1813, |
|
"eval_samples_per_second": 142.042, |
|
"eval_steps_per_second": 8.898, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 4.375688552856445, |
|
"learning_rate": 4.891847041847042e-05, |
|
"loss": 0.4315, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_f1": 0.614857769662433, |
|
"eval_loss": 0.788919985294342, |
|
"eval_precision": 0.6641593406916259, |
|
"eval_recall": 0.6034743750872783, |
|
"eval_runtime": 6.1798, |
|
"eval_samples_per_second": 142.076, |
|
"eval_steps_per_second": 8.9, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 4.091088771820068, |
|
"learning_rate": 4.885834535834536e-05, |
|
"loss": 0.4506, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_f1": 0.4967964786589283, |
|
"eval_loss": 0.8783875703811646, |
|
"eval_precision": 0.6387377173091459, |
|
"eval_recall": 0.5016645719871526, |
|
"eval_runtime": 5.9164, |
|
"eval_samples_per_second": 148.401, |
|
"eval_steps_per_second": 9.296, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.3903276920318604, |
|
"learning_rate": 4.87982202982203e-05, |
|
"loss": 0.4489, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_f1": 0.4949153076705755, |
|
"eval_loss": 0.7994188070297241, |
|
"eval_precision": 0.5340329579250159, |
|
"eval_recall": 0.49638597961178615, |
|
"eval_runtime": 5.9029, |
|
"eval_samples_per_second": 148.74, |
|
"eval_steps_per_second": 9.317, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 3.929879903793335, |
|
"learning_rate": 4.8738095238095235e-05, |
|
"loss": 0.4466, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_f1": 0.44642812881455524, |
|
"eval_loss": 0.8109920024871826, |
|
"eval_precision": 0.5776119229607602, |
|
"eval_recall": 0.47351207931853095, |
|
"eval_runtime": 5.9766, |
|
"eval_samples_per_second": 146.907, |
|
"eval_steps_per_second": 9.203, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 6.443171501159668, |
|
"learning_rate": 4.8677970177970176e-05, |
|
"loss": 0.4319, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_f1": 0.5481427288492505, |
|
"eval_loss": 0.8068605661392212, |
|
"eval_precision": 0.6612496177619213, |
|
"eval_recall": 0.5399497276916632, |
|
"eval_runtime": 5.9001, |
|
"eval_samples_per_second": 148.811, |
|
"eval_steps_per_second": 9.322, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 7.633645057678223, |
|
"learning_rate": 4.8617845117845116e-05, |
|
"loss": 0.4243, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_f1": 0.5797306372413114, |
|
"eval_loss": 0.7941620349884033, |
|
"eval_precision": 0.5948358635007136, |
|
"eval_recall": 0.5704752595075175, |
|
"eval_runtime": 6.145, |
|
"eval_samples_per_second": 142.881, |
|
"eval_steps_per_second": 8.95, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 3.275371789932251, |
|
"learning_rate": 4.8557720057720056e-05, |
|
"loss": 0.4398, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_f1": 0.5247242844808815, |
|
"eval_loss": 0.9738017916679382, |
|
"eval_precision": 0.5370369073777802, |
|
"eval_recall": 0.6070139179816599, |
|
"eval_runtime": 6.219, |
|
"eval_samples_per_second": 141.18, |
|
"eval_steps_per_second": 8.844, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.4162724018096924, |
|
"learning_rate": 4.8497594997595e-05, |
|
"loss": 0.4526, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_f1": 0.5589742980399895, |
|
"eval_loss": 0.7195601463317871, |
|
"eval_precision": 0.7046240283838195, |
|
"eval_recall": 0.5477959316668994, |
|
"eval_runtime": 6.3918, |
|
"eval_samples_per_second": 137.363, |
|
"eval_steps_per_second": 8.605, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 6.926381587982178, |
|
"learning_rate": 4.8437469937469944e-05, |
|
"loss": 0.4529, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_f1": 0.5863097712686139, |
|
"eval_loss": 0.8049713969230652, |
|
"eval_precision": 0.6419448505612538, |
|
"eval_recall": 0.5730605595121724, |
|
"eval_runtime": 6.3636, |
|
"eval_samples_per_second": 137.971, |
|
"eval_steps_per_second": 8.643, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.8420650959014893, |
|
"learning_rate": 4.837746512746513e-05, |
|
"loss": 0.446, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_f1": 0.6107236144330398, |
|
"eval_loss": 0.7564206719398499, |
|
"eval_precision": 0.6520992658162544, |
|
"eval_recall": 0.5912358609132803, |
|
"eval_runtime": 6.4128, |
|
"eval_samples_per_second": 136.914, |
|
"eval_steps_per_second": 8.577, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.423569679260254, |
|
"learning_rate": 4.831746031746032e-05, |
|
"loss": 0.4315, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_f1": 0.621245910301715, |
|
"eval_loss": 0.751511812210083, |
|
"eval_precision": 0.6474767054531395, |
|
"eval_recall": 0.6069198901456967, |
|
"eval_runtime": 5.9833, |
|
"eval_samples_per_second": 146.741, |
|
"eval_steps_per_second": 9.192, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 6.773381233215332, |
|
"learning_rate": 4.825733525733526e-05, |
|
"loss": 0.4464, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_f1": 0.559868694735591, |
|
"eval_loss": 0.8307517170906067, |
|
"eval_precision": 0.627583612882644, |
|
"eval_recall": 0.5512991667830377, |
|
"eval_runtime": 6.1679, |
|
"eval_samples_per_second": 142.35, |
|
"eval_steps_per_second": 8.917, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.220128059387207, |
|
"learning_rate": 4.8197330447330455e-05, |
|
"loss": 0.4423, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_f1": 0.5991996711711277, |
|
"eval_loss": 0.798150360584259, |
|
"eval_precision": 0.6176196711770697, |
|
"eval_recall": 0.5936535865568123, |
|
"eval_runtime": 6.0738, |
|
"eval_samples_per_second": 144.556, |
|
"eval_steps_per_second": 9.055, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.1065833568572998, |
|
"learning_rate": 4.8137205387205395e-05, |
|
"loss": 0.4551, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_f1": 0.6019748538222912, |
|
"eval_loss": 0.822293221950531, |
|
"eval_precision": 0.6355921902599784, |
|
"eval_recall": 0.5933528836754642, |
|
"eval_runtime": 6.1197, |
|
"eval_samples_per_second": 143.472, |
|
"eval_steps_per_second": 8.987, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 8.631648063659668, |
|
"learning_rate": 4.807708032708033e-05, |
|
"loss": 0.4408, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_f1": 0.5131249172090748, |
|
"eval_loss": 0.7691208124160767, |
|
"eval_precision": 0.608759764068229, |
|
"eval_recall": 0.5147484057161477, |
|
"eval_runtime": 6.3609, |
|
"eval_samples_per_second": 138.031, |
|
"eval_steps_per_second": 8.647, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 6.755849361419678, |
|
"learning_rate": 4.801695526695527e-05, |
|
"loss": 0.4389, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_f1": 0.6702519892656928, |
|
"eval_loss": 0.6971784234046936, |
|
"eval_precision": 0.6686766810877821, |
|
"eval_recall": 0.6729106735558349, |
|
"eval_runtime": 6.1341, |
|
"eval_samples_per_second": 143.134, |
|
"eval_steps_per_second": 8.966, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 19.813188552856445, |
|
"learning_rate": 4.795683020683021e-05, |
|
"loss": 0.3886, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_f1": 0.5543489692487942, |
|
"eval_loss": 0.7798230648040771, |
|
"eval_precision": 0.6125764375980934, |
|
"eval_recall": 0.543671740445934, |
|
"eval_runtime": 6.7491, |
|
"eval_samples_per_second": 130.09, |
|
"eval_steps_per_second": 8.149, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 7.927220821380615, |
|
"learning_rate": 4.789670514670515e-05, |
|
"loss": 0.3883, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_f1": 0.5978449313058904, |
|
"eval_loss": 0.8385018110275269, |
|
"eval_precision": 0.5948463716988197, |
|
"eval_recall": 0.6225499231950845, |
|
"eval_runtime": 6.122, |
|
"eval_samples_per_second": 143.416, |
|
"eval_steps_per_second": 8.984, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 6.237366199493408, |
|
"learning_rate": 4.783658008658009e-05, |
|
"loss": 0.4011, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_f1": 0.5914931472808443, |
|
"eval_loss": 0.7754688858985901, |
|
"eval_precision": 0.655128213311837, |
|
"eval_recall": 0.578716194200065, |
|
"eval_runtime": 6.558, |
|
"eval_samples_per_second": 133.882, |
|
"eval_steps_per_second": 8.387, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 3.3301048278808594, |
|
"learning_rate": 4.777645502645503e-05, |
|
"loss": 0.3992, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_f1": 0.5472455226037474, |
|
"eval_loss": 0.788632333278656, |
|
"eval_precision": 0.558195855728615, |
|
"eval_recall": 0.5519042964204254, |
|
"eval_runtime": 6.124, |
|
"eval_samples_per_second": 143.371, |
|
"eval_steps_per_second": 8.981, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 8.471348762512207, |
|
"learning_rate": 4.771645021645022e-05, |
|
"loss": 0.393, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_f1": 0.5889012942356766, |
|
"eval_loss": 0.7660124897956848, |
|
"eval_precision": 0.5901145289176211, |
|
"eval_recall": 0.592326956197924, |
|
"eval_runtime": 5.8572, |
|
"eval_samples_per_second": 149.9, |
|
"eval_steps_per_second": 9.39, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 15.840304374694824, |
|
"learning_rate": 4.765632515632516e-05, |
|
"loss": 0.3891, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_f1": 0.5354251462409856, |
|
"eval_loss": 0.7701670527458191, |
|
"eval_precision": 0.579215207029406, |
|
"eval_recall": 0.5330605595121725, |
|
"eval_runtime": 6.1187, |
|
"eval_samples_per_second": 143.495, |
|
"eval_steps_per_second": 8.989, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 1.6515294313430786, |
|
"learning_rate": 4.75962000962001e-05, |
|
"loss": 0.4119, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_f1": 0.5110658029804255, |
|
"eval_loss": 0.8545361161231995, |
|
"eval_precision": 0.5405823804957771, |
|
"eval_recall": 0.5243262114229856, |
|
"eval_runtime": 6.2418, |
|
"eval_samples_per_second": 140.665, |
|
"eval_steps_per_second": 8.812, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 3.166147470474243, |
|
"learning_rate": 4.753607503607504e-05, |
|
"loss": 0.3981, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_f1": 0.53639943040752, |
|
"eval_loss": 0.864085853099823, |
|
"eval_precision": 0.5695344700259635, |
|
"eval_recall": 0.5536247265279522, |
|
"eval_runtime": 5.9635, |
|
"eval_samples_per_second": 147.229, |
|
"eval_steps_per_second": 9.223, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 4.143538475036621, |
|
"learning_rate": 4.747594997594998e-05, |
|
"loss": 0.4, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_f1": 0.582186065915728, |
|
"eval_loss": 0.8044998049736023, |
|
"eval_precision": 0.5987904356270873, |
|
"eval_recall": 0.5844826141600334, |
|
"eval_runtime": 5.9156, |
|
"eval_samples_per_second": 148.422, |
|
"eval_steps_per_second": 9.298, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 5.849362850189209, |
|
"learning_rate": 4.741582491582492e-05, |
|
"loss": 0.4059, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_f1": 0.569600279809319, |
|
"eval_loss": 0.8023470044136047, |
|
"eval_precision": 0.6300909361955873, |
|
"eval_recall": 0.5548880510170833, |
|
"eval_runtime": 5.9073, |
|
"eval_samples_per_second": 148.629, |
|
"eval_steps_per_second": 9.31, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 2.0296847820281982, |
|
"learning_rate": 4.735582010582011e-05, |
|
"loss": 0.3805, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_f1": 0.5387095557628462, |
|
"eval_loss": 0.8242425322532654, |
|
"eval_precision": 0.5632921859195318, |
|
"eval_recall": 0.536337569240795, |
|
"eval_runtime": 6.1681, |
|
"eval_samples_per_second": 142.345, |
|
"eval_steps_per_second": 8.917, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 5.022754192352295, |
|
"learning_rate": 4.729569504569505e-05, |
|
"loss": 0.4126, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_f1": 0.525337187977395, |
|
"eval_loss": 0.8866151571273804, |
|
"eval_precision": 0.563019122327633, |
|
"eval_recall": 0.5244211702276219, |
|
"eval_runtime": 6.5791, |
|
"eval_samples_per_second": 133.453, |
|
"eval_steps_per_second": 8.36, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 6.320919990539551, |
|
"learning_rate": 4.7235690235690236e-05, |
|
"loss": 0.3959, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_f1": 0.5715827904573106, |
|
"eval_loss": 0.922848641872406, |
|
"eval_precision": 0.6485667793604627, |
|
"eval_recall": 0.5569566634082763, |
|
"eval_runtime": 6.5486, |
|
"eval_samples_per_second": 134.075, |
|
"eval_steps_per_second": 8.399, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 3.2674639225006104, |
|
"learning_rate": 4.717556517556518e-05, |
|
"loss": 0.3972, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_f1": 0.6330230633421515, |
|
"eval_loss": 0.8297170400619507, |
|
"eval_precision": 0.64149542011954, |
|
"eval_recall": 0.633559558720849, |
|
"eval_runtime": 6.1502, |
|
"eval_samples_per_second": 142.759, |
|
"eval_steps_per_second": 8.943, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 5.248292922973633, |
|
"learning_rate": 4.711544011544012e-05, |
|
"loss": 0.3779, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_f1": 0.5897470753706388, |
|
"eval_loss": 0.8682935833930969, |
|
"eval_precision": 0.6023327508623889, |
|
"eval_recall": 0.5919508448540706, |
|
"eval_runtime": 6.3839, |
|
"eval_samples_per_second": 137.534, |
|
"eval_steps_per_second": 8.615, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 4.1834635734558105, |
|
"learning_rate": 4.705531505531506e-05, |
|
"loss": 0.3951, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_f1": 0.5124969418380673, |
|
"eval_loss": 0.8628427982330322, |
|
"eval_precision": 0.5891878367677518, |
|
"eval_recall": 0.5116492110040497, |
|
"eval_runtime": 6.1272, |
|
"eval_samples_per_second": 143.295, |
|
"eval_steps_per_second": 8.976, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 12.86809253692627, |
|
"learning_rate": 4.699518999519e-05, |
|
"loss": 0.3916, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_f1": 0.5024144172335627, |
|
"eval_loss": 0.9203388094902039, |
|
"eval_precision": 0.6304846593419121, |
|
"eval_recall": 0.5026001955034213, |
|
"eval_runtime": 6.0613, |
|
"eval_samples_per_second": 144.854, |
|
"eval_steps_per_second": 9.074, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 3.2101404666900635, |
|
"learning_rate": 4.693506493506494e-05, |
|
"loss": 0.3524, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_f1": 0.5010573535401949, |
|
"eval_loss": 0.9825400710105896, |
|
"eval_precision": 0.6088672873311428, |
|
"eval_recall": 0.5039249639249639, |
|
"eval_runtime": 5.9279, |
|
"eval_samples_per_second": 148.113, |
|
"eval_steps_per_second": 9.278, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 16.025983810424805, |
|
"learning_rate": 4.687493987493988e-05, |
|
"loss": 0.3332, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_f1": 0.5814110917677252, |
|
"eval_loss": 0.8755331635475159, |
|
"eval_precision": 0.5979503457905185, |
|
"eval_recall": 0.5711502117953731, |
|
"eval_runtime": 6.5321, |
|
"eval_samples_per_second": 134.413, |
|
"eval_steps_per_second": 8.42, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 12.575716972351074, |
|
"learning_rate": 4.681481481481482e-05, |
|
"loss": 0.3517, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_f1": 0.6181463909269773, |
|
"eval_loss": 0.9921577572822571, |
|
"eval_precision": 0.6701390442386371, |
|
"eval_recall": 0.5940511101801424, |
|
"eval_runtime": 6.2002, |
|
"eval_samples_per_second": 141.609, |
|
"eval_steps_per_second": 8.871, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 2.219468355178833, |
|
"learning_rate": 4.675468975468976e-05, |
|
"loss": 0.3534, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_f1": 0.5242620258087817, |
|
"eval_loss": 0.9572548866271973, |
|
"eval_precision": 0.5652503976549385, |
|
"eval_recall": 0.5174640413350091, |
|
"eval_runtime": 6.4041, |
|
"eval_samples_per_second": 137.101, |
|
"eval_steps_per_second": 8.588, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"grad_norm": 2.1716973781585693, |
|
"learning_rate": 4.6694684944684945e-05, |
|
"loss": 0.3544, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_f1": 0.5551290620723939, |
|
"eval_loss": 0.9826774001121521, |
|
"eval_precision": 0.5738657811880764, |
|
"eval_recall": 0.5531322440999861, |
|
"eval_runtime": 5.8897, |
|
"eval_samples_per_second": 149.075, |
|
"eval_steps_per_second": 9.338, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 5.642761707305908, |
|
"learning_rate": 4.6634559884559885e-05, |
|
"loss": 0.3526, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_f1": 0.46574966897620484, |
|
"eval_loss": 0.9517427682876587, |
|
"eval_precision": 0.6019158514451703, |
|
"eval_recall": 0.4737364427687008, |
|
"eval_runtime": 6.2232, |
|
"eval_samples_per_second": 141.086, |
|
"eval_steps_per_second": 8.838, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 8.693815231323242, |
|
"learning_rate": 4.6574434824434825e-05, |
|
"loss": 0.3448, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_f1": 0.5231658522131929, |
|
"eval_loss": 0.955856204032898, |
|
"eval_precision": 0.5743577178625582, |
|
"eval_recall": 0.5138062654191686, |
|
"eval_runtime": 6.2254, |
|
"eval_samples_per_second": 141.036, |
|
"eval_steps_per_second": 8.835, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 10.058433532714844, |
|
"learning_rate": 4.6514309764309766e-05, |
|
"loss": 0.3662, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_f1": 0.6173176500366803, |
|
"eval_loss": 0.8469758033752441, |
|
"eval_precision": 0.6416565078769693, |
|
"eval_recall": 0.6176418563515337, |
|
"eval_runtime": 6.1339, |
|
"eval_samples_per_second": 143.14, |
|
"eval_steps_per_second": 8.967, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 9.207432746887207, |
|
"learning_rate": 4.645466570466571e-05, |
|
"loss": 0.3502, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_f1": 0.5911826792863208, |
|
"eval_loss": 0.8524171113967896, |
|
"eval_precision": 0.6606129937002267, |
|
"eval_recall": 0.577619513103384, |
|
"eval_runtime": 5.9367, |
|
"eval_samples_per_second": 147.893, |
|
"eval_steps_per_second": 9.264, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 2.538233757019043, |
|
"learning_rate": 4.639454064454065e-05, |
|
"loss": 0.3733, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_f1": 0.5466184654496565, |
|
"eval_loss": 0.9210164546966553, |
|
"eval_precision": 0.5577658998711631, |
|
"eval_recall": 0.5554857329050877, |
|
"eval_runtime": 6.4254, |
|
"eval_samples_per_second": 136.645, |
|
"eval_steps_per_second": 8.56, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 2.017235279083252, |
|
"learning_rate": 4.633441558441559e-05, |
|
"loss": 0.3424, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_f1": 0.5809192439862544, |
|
"eval_loss": 0.9294881820678711, |
|
"eval_precision": 0.5863171312403235, |
|
"eval_recall": 0.6100302564818694, |
|
"eval_runtime": 6.2949, |
|
"eval_samples_per_second": 139.477, |
|
"eval_steps_per_second": 8.737, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 7.538774490356445, |
|
"learning_rate": 4.627429052429053e-05, |
|
"loss": 0.3591, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_f1": 0.4588251776601326, |
|
"eval_loss": 0.970705509185791, |
|
"eval_precision": 0.5827537007312288, |
|
"eval_recall": 0.4768803239770982, |
|
"eval_runtime": 6.0168, |
|
"eval_samples_per_second": 145.925, |
|
"eval_steps_per_second": 9.141, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 4.64936637878418, |
|
"learning_rate": 4.621416546416546e-05, |
|
"loss": 0.3634, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_f1": 0.575160103511553, |
|
"eval_loss": 0.8524229526519775, |
|
"eval_precision": 0.6136046998053873, |
|
"eval_recall": 0.5680603267700042, |
|
"eval_runtime": 6.5694, |
|
"eval_samples_per_second": 133.651, |
|
"eval_steps_per_second": 8.372, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"step": 31500, |
|
"total_flos": 1.3260126913238016e+17, |
|
"train_loss": 0.42908321610708083, |
|
"train_runtime": 7590.5884, |
|
"train_samples_per_second": 876.388, |
|
"train_steps_per_second": 54.778 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 415800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.3260126913238016e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|