|
{ |
|
"best_metric": 0.1206900030374527, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-giant-2024_08_28-batch-size32_epochs150_freeze/checkpoint-25935", |
|
"epoch": 105.0, |
|
"eval_steps": 500, |
|
"global_step": 28665, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.21205821205821207, |
|
"eval_f1_macro": 0.5175126673232894, |
|
"eval_f1_micro": 0.7424333879451582, |
|
"eval_loss": 0.17437300086021423, |
|
"eval_roc_auc": 0.8285535192873753, |
|
"eval_runtime": 747.1492, |
|
"eval_samples_per_second": 3.863, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.29891085624694824, |
|
"learning_rate": 0.001, |
|
"loss": 0.2593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24774774774774774, |
|
"eval_f1_macro": 0.5912510936495889, |
|
"eval_f1_micro": 0.7776526996039191, |
|
"eval_loss": 0.1514047533273697, |
|
"eval_roc_auc": 0.856455760350861, |
|
"eval_runtime": 745.2688, |
|
"eval_samples_per_second": 3.872, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.23873873873873874, |
|
"eval_f1_macro": 0.6203462640123141, |
|
"eval_f1_micro": 0.7752795082305376, |
|
"eval_loss": 0.1557399332523346, |
|
"eval_roc_auc": 0.8580342914691714, |
|
"eval_runtime": 748.2805, |
|
"eval_samples_per_second": 3.857, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.24181818962097168, |
|
"learning_rate": 0.001, |
|
"loss": 0.1694, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6112936548561337, |
|
"eval_f1_micro": 0.7691087713115115, |
|
"eval_loss": 0.1499096304178238, |
|
"eval_roc_auc": 0.8372664798756062, |
|
"eval_runtime": 747.4138, |
|
"eval_samples_per_second": 3.861, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.24497574497574498, |
|
"eval_f1_macro": 0.6316545255681125, |
|
"eval_f1_micro": 0.7744962975718961, |
|
"eval_loss": 0.15773828327655792, |
|
"eval_roc_auc": 0.8461026726645842, |
|
"eval_runtime": 747.0386, |
|
"eval_samples_per_second": 3.863, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.17729038000106812, |
|
"learning_rate": 0.001, |
|
"loss": 0.1637, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.25744975744975745, |
|
"eval_f1_macro": 0.6220908262048482, |
|
"eval_f1_micro": 0.7803354441211706, |
|
"eval_loss": 0.1529887616634369, |
|
"eval_roc_auc": 0.8508892919574323, |
|
"eval_runtime": 747.6468, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.2616077616077616, |
|
"eval_f1_macro": 0.6318272608971183, |
|
"eval_f1_micro": 0.7837652308220353, |
|
"eval_loss": 0.14232446253299713, |
|
"eval_roc_auc": 0.8519980061789139, |
|
"eval_runtime": 743.8547, |
|
"eval_samples_per_second": 3.88, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.21456240117549896, |
|
"learning_rate": 0.001, |
|
"loss": 0.1598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2591822591822592, |
|
"eval_f1_macro": 0.6268140575796306, |
|
"eval_f1_micro": 0.7824785045129828, |
|
"eval_loss": 0.14342056214809418, |
|
"eval_roc_auc": 0.8521029956678926, |
|
"eval_runtime": 745.5826, |
|
"eval_samples_per_second": 3.871, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.25848925848925847, |
|
"eval_f1_macro": 0.6406683603322132, |
|
"eval_f1_micro": 0.7840562521179261, |
|
"eval_loss": 0.14322087168693542, |
|
"eval_roc_auc": 0.8556312702614824, |
|
"eval_runtime": 746.6555, |
|
"eval_samples_per_second": 3.865, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.17193137109279633, |
|
"learning_rate": 0.001, |
|
"loss": 0.157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2591822591822592, |
|
"eval_f1_macro": 0.6350156993693012, |
|
"eval_f1_micro": 0.7779440239394473, |
|
"eval_loss": 0.15065954625606537, |
|
"eval_roc_auc": 0.8421810798397646, |
|
"eval_runtime": 749.2424, |
|
"eval_samples_per_second": 3.852, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.17156100273132324, |
|
"learning_rate": 0.001, |
|
"loss": 0.1564, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.26853776853776856, |
|
"eval_f1_macro": 0.6442254017268965, |
|
"eval_f1_micro": 0.7905542412977358, |
|
"eval_loss": 0.14012028276920319, |
|
"eval_roc_auc": 0.8599228950325096, |
|
"eval_runtime": 743.9581, |
|
"eval_samples_per_second": 3.879, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.26056826056826055, |
|
"eval_f1_macro": 0.6412994039301575, |
|
"eval_f1_micro": 0.7896027049873203, |
|
"eval_loss": 0.14037516713142395, |
|
"eval_roc_auc": 0.8592624552114599, |
|
"eval_runtime": 747.0487, |
|
"eval_samples_per_second": 3.863, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.14995847642421722, |
|
"learning_rate": 0.001, |
|
"loss": 0.1556, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2695772695772696, |
|
"eval_f1_macro": 0.6359393136512833, |
|
"eval_f1_micro": 0.7822141560798549, |
|
"eval_loss": 0.1420680731534958, |
|
"eval_roc_auc": 0.8492469381754499, |
|
"eval_runtime": 742.4635, |
|
"eval_samples_per_second": 3.887, |
|
"eval_steps_per_second": 0.123, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.2636867636867637, |
|
"eval_f1_macro": 0.6459907944955716, |
|
"eval_f1_micro": 0.7887275978034142, |
|
"eval_loss": 0.13944004476070404, |
|
"eval_roc_auc": 0.8568078446879906, |
|
"eval_runtime": 744.9297, |
|
"eval_samples_per_second": 3.874, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.1688154637813568, |
|
"learning_rate": 0.001, |
|
"loss": 0.1547, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2553707553707554, |
|
"eval_f1_macro": 0.6554204386045119, |
|
"eval_f1_micro": 0.7915315007683115, |
|
"eval_loss": 0.13796783983707428, |
|
"eval_roc_auc": 0.8575869560318454, |
|
"eval_runtime": 749.7594, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.255024255024255, |
|
"eval_f1_macro": 0.6452554527968026, |
|
"eval_f1_micro": 0.7857792404624779, |
|
"eval_loss": 0.1441228836774826, |
|
"eval_roc_auc": 0.8505811645074093, |
|
"eval_runtime": 751.9487, |
|
"eval_samples_per_second": 3.838, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.15101341903209686, |
|
"learning_rate": 0.001, |
|
"loss": 0.1539, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.26784476784476785, |
|
"eval_f1_macro": 0.6485416937632181, |
|
"eval_f1_micro": 0.7904489177124567, |
|
"eval_loss": 0.14113685488700867, |
|
"eval_roc_auc": 0.8607338640531657, |
|
"eval_runtime": 751.954, |
|
"eval_samples_per_second": 3.838, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.26056826056826055, |
|
"eval_f1_macro": 0.654854199500387, |
|
"eval_f1_micro": 0.7940517933336151, |
|
"eval_loss": 0.1381485015153885, |
|
"eval_roc_auc": 0.8618218271900107, |
|
"eval_runtime": 756.1006, |
|
"eval_samples_per_second": 3.817, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.17647762596607208, |
|
"learning_rate": 0.001, |
|
"loss": 0.1552, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.2654192654192654, |
|
"eval_f1_macro": 0.6522812524843972, |
|
"eval_f1_micro": 0.793669650812508, |
|
"eval_loss": 0.13720253109931946, |
|
"eval_roc_auc": 0.8604083523719281, |
|
"eval_runtime": 753.1197, |
|
"eval_samples_per_second": 3.832, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.253984753984754, |
|
"eval_f1_macro": 0.6515497507659908, |
|
"eval_f1_micro": 0.791502353390154, |
|
"eval_loss": 0.13964051008224487, |
|
"eval_roc_auc": 0.8593941380801585, |
|
"eval_runtime": 760.0428, |
|
"eval_samples_per_second": 3.797, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.15846939384937286, |
|
"learning_rate": 0.001, |
|
"loss": 0.1531, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2577962577962578, |
|
"eval_f1_macro": 0.6542904488686327, |
|
"eval_f1_micro": 0.7925025501530093, |
|
"eval_loss": 0.13785456120967865, |
|
"eval_roc_auc": 0.8592903826569759, |
|
"eval_runtime": 757.5213, |
|
"eval_samples_per_second": 3.81, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.16983690857887268, |
|
"learning_rate": 0.001, |
|
"loss": 0.1536, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.2661122661122661, |
|
"eval_f1_macro": 0.6524154901292529, |
|
"eval_f1_micro": 0.7952276188864443, |
|
"eval_loss": 0.13633865118026733, |
|
"eval_roc_auc": 0.8620495257431491, |
|
"eval_runtime": 758.4735, |
|
"eval_samples_per_second": 3.805, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.27096327096327094, |
|
"eval_f1_macro": 0.656651787807274, |
|
"eval_f1_micro": 0.7961679924728424, |
|
"eval_loss": 0.13627886772155762, |
|
"eval_roc_auc": 0.8595478597543244, |
|
"eval_runtime": 753.7633, |
|
"eval_samples_per_second": 3.829, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.1691550612449646, |
|
"learning_rate": 0.001, |
|
"loss": 0.1535, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2661122661122661, |
|
"eval_f1_macro": 0.6438900918479138, |
|
"eval_f1_micro": 0.7871861324722778, |
|
"eval_loss": 0.14012865722179413, |
|
"eval_roc_auc": 0.8565085837324373, |
|
"eval_runtime": 758.2383, |
|
"eval_samples_per_second": 3.806, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6538094573584412, |
|
"eval_f1_micro": 0.7960565795113589, |
|
"eval_loss": 0.1359640210866928, |
|
"eval_roc_auc": 0.8588707063899927, |
|
"eval_runtime": 765.0178, |
|
"eval_samples_per_second": 3.772, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 0.001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.14603881537914276, |
|
"learning_rate": 0.001, |
|
"loss": 0.153, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_f1_macro": 0.6407905722004358, |
|
"eval_f1_micro": 0.7942222975262623, |
|
"eval_loss": 0.1370791494846344, |
|
"eval_roc_auc": 0.8611700794845683, |
|
"eval_runtime": 750.2435, |
|
"eval_samples_per_second": 3.847, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2654192654192654, |
|
"eval_f1_macro": 0.6469565906332285, |
|
"eval_f1_micro": 0.7902460077686664, |
|
"eval_loss": 0.13669614493846893, |
|
"eval_roc_auc": 0.8538650806596136, |
|
"eval_runtime": 744.2164, |
|
"eval_samples_per_second": 3.878, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.1542704999446869, |
|
"learning_rate": 0.001, |
|
"loss": 0.1532, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.26888426888426886, |
|
"eval_f1_macro": 0.642689033704319, |
|
"eval_f1_micro": 0.7912144926283021, |
|
"eval_loss": 0.1371130496263504, |
|
"eval_roc_auc": 0.8539010295328042, |
|
"eval_runtime": 744.0106, |
|
"eval_samples_per_second": 3.879, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_f1_macro": 0.6484600603294314, |
|
"eval_f1_micro": 0.7944120277694962, |
|
"eval_loss": 0.13781629502773285, |
|
"eval_roc_auc": 0.8597476308619466, |
|
"eval_runtime": 751.4281, |
|
"eval_samples_per_second": 3.841, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.15774671733379364, |
|
"learning_rate": 0.001, |
|
"loss": 0.1539, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.26507276507276506, |
|
"eval_f1_macro": 0.6472439075890195, |
|
"eval_f1_micro": 0.7938241064573914, |
|
"eval_loss": 0.13641151785850525, |
|
"eval_roc_auc": 0.8590391831986771, |
|
"eval_runtime": 743.8204, |
|
"eval_samples_per_second": 3.88, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.2747747747747748, |
|
"eval_f1_macro": 0.6533472550118105, |
|
"eval_f1_micro": 0.7999161777032691, |
|
"eval_loss": 0.13565559685230255, |
|
"eval_roc_auc": 0.8672849828142924, |
|
"eval_runtime": 745.046, |
|
"eval_samples_per_second": 3.874, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.15824691951274872, |
|
"learning_rate": 0.001, |
|
"loss": 0.1527, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2664587664587665, |
|
"eval_f1_macro": 0.662032330499469, |
|
"eval_f1_micro": 0.7928646379853095, |
|
"eval_loss": 0.137930765748024, |
|
"eval_roc_auc": 0.8629893205019107, |
|
"eval_runtime": 747.6199, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.17653779685497284, |
|
"learning_rate": 0.001, |
|
"loss": 0.1524, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.273042273042273, |
|
"eval_f1_macro": 0.6722007856831675, |
|
"eval_f1_micro": 0.7989514185446704, |
|
"eval_loss": 0.13557712733745575, |
|
"eval_roc_auc": 0.8642597778252326, |
|
"eval_runtime": 743.3529, |
|
"eval_samples_per_second": 3.882, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.273042273042273, |
|
"eval_f1_macro": 0.670590685863264, |
|
"eval_f1_micro": 0.7966670917825107, |
|
"eval_loss": 0.1347290426492691, |
|
"eval_roc_auc": 0.8614922779674185, |
|
"eval_runtime": 743.2445, |
|
"eval_samples_per_second": 3.883, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.15610426664352417, |
|
"learning_rate": 0.001, |
|
"loss": 0.1516, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.2772002772002772, |
|
"eval_f1_macro": 0.6482708127714739, |
|
"eval_f1_micro": 0.7946646145953571, |
|
"eval_loss": 0.13544337451457977, |
|
"eval_roc_auc": 0.8588431142884431, |
|
"eval_runtime": 750.5786, |
|
"eval_samples_per_second": 3.845, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.25848925848925847, |
|
"eval_f1_macro": 0.6552995006011981, |
|
"eval_f1_micro": 0.7927604900328681, |
|
"eval_loss": 0.13763058185577393, |
|
"eval_roc_auc": 0.8582396561141522, |
|
"eval_runtime": 746.9319, |
|
"eval_samples_per_second": 3.864, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.176735520362854, |
|
"learning_rate": 0.001, |
|
"loss": 0.1527, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2747747747747748, |
|
"eval_f1_macro": 0.6680976075122991, |
|
"eval_f1_micro": 0.7992204380799051, |
|
"eval_loss": 0.13456694781780243, |
|
"eval_roc_auc": 0.8638335422302681, |
|
"eval_runtime": 744.024, |
|
"eval_samples_per_second": 3.879, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.27165627165627165, |
|
"eval_f1_macro": 0.6543467314054483, |
|
"eval_f1_micro": 0.7889066758966815, |
|
"eval_loss": 0.13784632086753845, |
|
"eval_roc_auc": 0.8524819477636044, |
|
"eval_runtime": 745.3518, |
|
"eval_samples_per_second": 3.872, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.16059936583042145, |
|
"learning_rate": 0.001, |
|
"loss": 0.1503, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.2664587664587665, |
|
"eval_f1_macro": 0.6627442989440849, |
|
"eval_f1_micro": 0.7965357098029371, |
|
"eval_loss": 0.13671767711639404, |
|
"eval_roc_auc": 0.865910488378856, |
|
"eval_runtime": 745.9061, |
|
"eval_samples_per_second": 3.869, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.27373527373527373, |
|
"eval_f1_macro": 0.670153584497431, |
|
"eval_f1_micro": 0.8004978220286246, |
|
"eval_loss": 0.13730555772781372, |
|
"eval_roc_auc": 0.8705375510125241, |
|
"eval_runtime": 744.6796, |
|
"eval_samples_per_second": 3.875, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.16920654475688934, |
|
"learning_rate": 0.001, |
|
"loss": 0.152, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.26576576576576577, |
|
"eval_f1_macro": 0.6610276871242879, |
|
"eval_f1_micro": 0.7942296990711015, |
|
"eval_loss": 0.13770104944705963, |
|
"eval_roc_auc": 0.8582536198369102, |
|
"eval_runtime": 744.9969, |
|
"eval_samples_per_second": 3.874, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.28101178101178104, |
|
"eval_f1_macro": 0.6705886094654014, |
|
"eval_f1_micro": 0.8001525876319246, |
|
"eval_loss": 0.13536451756954193, |
|
"eval_roc_auc": 0.8642216961644161, |
|
"eval_runtime": 751.3727, |
|
"eval_samples_per_second": 3.841, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.1676277071237564, |
|
"learning_rate": 0.001, |
|
"loss": 0.1515, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.26507276507276506, |
|
"eval_f1_macro": 0.6619628883017729, |
|
"eval_f1_micro": 0.8000498525196295, |
|
"eval_loss": 0.13665379583835602, |
|
"eval_roc_auc": 0.8698817657657271, |
|
"eval_runtime": 749.8198, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.15791508555412292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.147, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2869022869022869, |
|
"eval_f1_macro": 0.6825865030851337, |
|
"eval_f1_micro": 0.808658516161447, |
|
"eval_loss": 0.12908011674880981, |
|
"eval_roc_auc": 0.8723907154255005, |
|
"eval_runtime": 750.2309, |
|
"eval_samples_per_second": 3.847, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.29972279972279975, |
|
"eval_f1_macro": 0.6938587241702103, |
|
"eval_f1_micro": 0.811512367788968, |
|
"eval_loss": 0.12761357426643372, |
|
"eval_roc_auc": 0.8720936945676423, |
|
"eval_runtime": 758.8984, |
|
"eval_samples_per_second": 3.803, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.16074201464653015, |
|
"learning_rate": 0.0001, |
|
"loss": 0.139, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2959112959112959, |
|
"eval_f1_macro": 0.6856377454961721, |
|
"eval_f1_micro": 0.8103163511624953, |
|
"eval_loss": 0.12698666751384735, |
|
"eval_roc_auc": 0.8699996458767716, |
|
"eval_runtime": 752.5715, |
|
"eval_samples_per_second": 3.835, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6942647446672258, |
|
"eval_f1_micro": 0.8124920976103174, |
|
"eval_loss": 0.12690682709217072, |
|
"eval_roc_auc": 0.8725812846946867, |
|
"eval_runtime": 759.108, |
|
"eval_samples_per_second": 3.802, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.17895784974098206, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1375, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.694151320978192, |
|
"eval_f1_micro": 0.8131711409395973, |
|
"eval_loss": 0.12617328763008118, |
|
"eval_roc_auc": 0.8743386078020858, |
|
"eval_runtime": 767.7622, |
|
"eval_samples_per_second": 3.759, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6956458198072734, |
|
"eval_f1_micro": 0.8147346514047868, |
|
"eval_loss": 0.1263018250465393, |
|
"eval_roc_auc": 0.8774737921983433, |
|
"eval_runtime": 752.7691, |
|
"eval_samples_per_second": 3.834, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.22477330267429352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1353, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2927927927927928, |
|
"eval_f1_macro": 0.7006577033751422, |
|
"eval_f1_micro": 0.8153475224476222, |
|
"eval_loss": 0.1258096992969513, |
|
"eval_roc_auc": 0.8781952512075065, |
|
"eval_runtime": 751.7275, |
|
"eval_samples_per_second": 3.839, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6994505755010588, |
|
"eval_f1_micro": 0.8151571934207786, |
|
"eval_loss": 0.12573884427547455, |
|
"eval_roc_auc": 0.8775850056713371, |
|
"eval_runtime": 754.5773, |
|
"eval_samples_per_second": 3.825, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.1825592815876007, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1337, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6974514657531053, |
|
"eval_f1_micro": 0.8134649455833967, |
|
"eval_loss": 0.12501972913742065, |
|
"eval_roc_auc": 0.8728563740571469, |
|
"eval_runtime": 748.8299, |
|
"eval_samples_per_second": 3.854, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.6962280886309719, |
|
"eval_f1_micro": 0.8132960287301124, |
|
"eval_loss": 0.12481856346130371, |
|
"eval_roc_auc": 0.8757195542554345, |
|
"eval_runtime": 754.8846, |
|
"eval_samples_per_second": 3.823, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.16182786226272583, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1338, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.6980743235485474, |
|
"eval_f1_micro": 0.8143470573377115, |
|
"eval_loss": 0.12473563104867935, |
|
"eval_roc_auc": 0.8739288040614714, |
|
"eval_runtime": 764.2531, |
|
"eval_samples_per_second": 3.776, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.22775864601135254, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1322, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.30076230076230076, |
|
"eval_f1_macro": 0.7020497284253308, |
|
"eval_f1_micro": 0.8165587111775452, |
|
"eval_loss": 0.12453257292509079, |
|
"eval_roc_auc": 0.8792131676966645, |
|
"eval_runtime": 758.6078, |
|
"eval_samples_per_second": 3.804, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3011088011088011, |
|
"eval_f1_macro": 0.7041152638460181, |
|
"eval_f1_micro": 0.8185497191939213, |
|
"eval_loss": 0.12440259009599686, |
|
"eval_roc_auc": 0.8819546448626913, |
|
"eval_runtime": 755.48, |
|
"eval_samples_per_second": 3.82, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.26265445351600647, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1313, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.3004158004158004, |
|
"eval_f1_macro": 0.6984123654445143, |
|
"eval_f1_micro": 0.8162207357859533, |
|
"eval_loss": 0.12393573671579361, |
|
"eval_roc_auc": 0.8770029692696153, |
|
"eval_runtime": 749.3127, |
|
"eval_samples_per_second": 3.852, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.30006930006930005, |
|
"eval_f1_macro": 0.7041206694443728, |
|
"eval_f1_micro": 0.8171478565179352, |
|
"eval_loss": 0.12355069816112518, |
|
"eval_roc_auc": 0.8785400518736873, |
|
"eval_runtime": 751.5939, |
|
"eval_samples_per_second": 3.84, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.19159354269504547, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1309, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.701908769020469, |
|
"eval_f1_micro": 0.8158932617269447, |
|
"eval_loss": 0.1237163171172142, |
|
"eval_roc_auc": 0.8757623441455382, |
|
"eval_runtime": 749.4527, |
|
"eval_samples_per_second": 3.851, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.7008492179245241, |
|
"eval_f1_micro": 0.8152564590468943, |
|
"eval_loss": 0.12339853495359421, |
|
"eval_roc_auc": 0.8731348839280636, |
|
"eval_runtime": 748.7843, |
|
"eval_samples_per_second": 3.854, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.19487616419792175, |
|
"learning_rate": 0.0001, |
|
"loss": 0.13, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.3024948024948025, |
|
"eval_f1_macro": 0.7083200505706103, |
|
"eval_f1_micro": 0.8188720173535793, |
|
"eval_loss": 0.12294851988554001, |
|
"eval_roc_auc": 0.8791109816832443, |
|
"eval_runtime": 752.7718, |
|
"eval_samples_per_second": 3.834, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.30284130284130284, |
|
"eval_f1_macro": 0.7054890147149661, |
|
"eval_f1_micro": 0.8166017506386899, |
|
"eval_loss": 0.12270853668451309, |
|
"eval_roc_auc": 0.876682675540494, |
|
"eval_runtime": 746.0294, |
|
"eval_samples_per_second": 3.868, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.20640559494495392, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1288, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.3038808038808039, |
|
"eval_f1_macro": 0.7105833307429198, |
|
"eval_f1_micro": 0.8176490288010717, |
|
"eval_loss": 0.12301415950059891, |
|
"eval_roc_auc": 0.8773957777780161, |
|
"eval_runtime": 748.1364, |
|
"eval_samples_per_second": 3.858, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7085844813380441, |
|
"eval_f1_micro": 0.8191759178412541, |
|
"eval_loss": 0.12328237295150757, |
|
"eval_roc_auc": 0.880258676287372, |
|
"eval_runtime": 749.8061, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.2363331913948059, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1291, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7103887558295827, |
|
"eval_f1_micro": 0.8187567612548888, |
|
"eval_loss": 0.12309526652097702, |
|
"eval_roc_auc": 0.8798153918051592, |
|
"eval_runtime": 745.0937, |
|
"eval_samples_per_second": 3.873, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.26966458559036255, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1283, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.30284130284130284, |
|
"eval_f1_macro": 0.7061406642055487, |
|
"eval_f1_micro": 0.8186407442947141, |
|
"eval_loss": 0.12194398790597916, |
|
"eval_roc_auc": 0.8789458717279818, |
|
"eval_runtime": 744.2128, |
|
"eval_samples_per_second": 3.878, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.3042273042273042, |
|
"eval_f1_macro": 0.7154558287425048, |
|
"eval_f1_micro": 0.8196775527077305, |
|
"eval_loss": 0.12292120605707169, |
|
"eval_roc_auc": 0.8822622625898855, |
|
"eval_runtime": 743.6955, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.2636018991470337, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1273, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7153434473934246, |
|
"eval_f1_micro": 0.8209686046990085, |
|
"eval_loss": 0.12254418432712555, |
|
"eval_roc_auc": 0.8843888396454903, |
|
"eval_runtime": 743.6093, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.3031878031878032, |
|
"eval_f1_macro": 0.7101570111652898, |
|
"eval_f1_micro": 0.8195983668027664, |
|
"eval_loss": 0.12215162813663483, |
|
"eval_roc_auc": 0.87988510310888, |
|
"eval_runtime": 744.98, |
|
"eval_samples_per_second": 3.874, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.19965404272079468, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1265, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7109091736321397, |
|
"eval_f1_micro": 0.8184682603033231, |
|
"eval_loss": 0.12227334082126617, |
|
"eval_roc_auc": 0.8767948413903521, |
|
"eval_runtime": 744.4872, |
|
"eval_samples_per_second": 3.876, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.7120407268503043, |
|
"eval_f1_micro": 0.8170385739086251, |
|
"eval_loss": 0.12237659096717834, |
|
"eval_roc_auc": 0.8737123194105673, |
|
"eval_runtime": 747.0787, |
|
"eval_samples_per_second": 3.863, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.2734057903289795, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1264, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.3063063063063063, |
|
"eval_f1_macro": 0.7203981522602361, |
|
"eval_f1_micro": 0.8203632727878687, |
|
"eval_loss": 0.1220996230840683, |
|
"eval_roc_auc": 0.8803336591982435, |
|
"eval_runtime": 742.9487, |
|
"eval_samples_per_second": 3.885, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 0.0001, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.3087318087318087, |
|
"eval_f1_macro": 0.7144193511981376, |
|
"eval_f1_micro": 0.8198457369189076, |
|
"eval_loss": 0.12169401347637177, |
|
"eval_roc_auc": 0.8798110725748728, |
|
"eval_runtime": 752.9878, |
|
"eval_samples_per_second": 3.833, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.20597431063652039, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1249, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7124121424308173, |
|
"eval_f1_micro": 0.8190452070406484, |
|
"eval_loss": 0.12149834632873535, |
|
"eval_roc_auc": 0.8757233637628921, |
|
"eval_runtime": 756.5322, |
|
"eval_samples_per_second": 3.815, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1e-05, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.30561330561330563, |
|
"eval_f1_macro": 0.7145366354361308, |
|
"eval_f1_micro": 0.8208643316893754, |
|
"eval_loss": 0.12120900303125381, |
|
"eval_roc_auc": 0.879641026356426, |
|
"eval_runtime": 752.1644, |
|
"eval_samples_per_second": 3.837, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.25457698106765747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1236, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7191205487713891, |
|
"eval_f1_micro": 0.8218541121766927, |
|
"eval_loss": 0.1215985044836998, |
|
"eval_roc_auc": 0.8821938390069956, |
|
"eval_runtime": 752.3495, |
|
"eval_samples_per_second": 3.836, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.2589890658855438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1233, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.31323631323631324, |
|
"eval_f1_macro": 0.7202749659896155, |
|
"eval_f1_micro": 0.8236983547367989, |
|
"eval_loss": 0.1214083805680275, |
|
"eval_roc_auc": 0.8867951606378082, |
|
"eval_runtime": 755.0282, |
|
"eval_samples_per_second": 3.822, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7168480610158249, |
|
"eval_f1_micro": 0.8222591362126246, |
|
"eval_loss": 0.12110316008329391, |
|
"eval_roc_auc": 0.8823316922046746, |
|
"eval_runtime": 752.7354, |
|
"eval_samples_per_second": 3.834, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.26676803827285767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.123, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7160500850094047, |
|
"eval_f1_micro": 0.8202977563430488, |
|
"eval_loss": 0.12149946391582489, |
|
"eval_roc_auc": 0.878321716124089, |
|
"eval_runtime": 752.3192, |
|
"eval_samples_per_second": 3.836, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7150848378423871, |
|
"eval_f1_micro": 0.8219257062844905, |
|
"eval_loss": 0.121590256690979, |
|
"eval_roc_auc": 0.8846639290079505, |
|
"eval_runtime": 747.5776, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 1e-05, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.2525629699230194, |
|
"learning_rate": 1e-05, |
|
"loss": 0.123, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.3115038115038115, |
|
"eval_f1_macro": 0.7187103786018064, |
|
"eval_f1_micro": 0.8216162121591194, |
|
"eval_loss": 0.12097962200641632, |
|
"eval_roc_auc": 0.8807537244642276, |
|
"eval_runtime": 755.3491, |
|
"eval_samples_per_second": 3.821, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1e-05, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.7156786549052798, |
|
"eval_f1_micro": 0.821175978238125, |
|
"eval_loss": 0.12082336097955704, |
|
"eval_roc_auc": 0.8794272915260457, |
|
"eval_runtime": 753.7414, |
|
"eval_samples_per_second": 3.829, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.23939679563045502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1214, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.30006930006930005, |
|
"eval_f1_macro": 0.7102312532643303, |
|
"eval_f1_micro": 0.8180206046275968, |
|
"eval_loss": 0.12147542089223862, |
|
"eval_roc_auc": 0.8750765523206339, |
|
"eval_runtime": 745.2706, |
|
"eval_samples_per_second": 3.872, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 1e-05, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.31185031185031187, |
|
"eval_f1_macro": 0.7195842513107142, |
|
"eval_f1_micro": 0.8215978053038491, |
|
"eval_loss": 0.12100570648908615, |
|
"eval_roc_auc": 0.8816901523695672, |
|
"eval_runtime": 742.6349, |
|
"eval_samples_per_second": 3.886, |
|
"eval_steps_per_second": 0.123, |
|
"learning_rate": 1e-05, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.30801209807395935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1234, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7201395616901511, |
|
"eval_f1_micro": 0.8233587533156498, |
|
"eval_loss": 0.1208326444029808, |
|
"eval_roc_auc": 0.8835425924395763, |
|
"eval_runtime": 742.1618, |
|
"eval_samples_per_second": 3.889, |
|
"eval_steps_per_second": 0.123, |
|
"learning_rate": 1e-05, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.7215167678270465, |
|
"eval_f1_micro": 0.8218151540383014, |
|
"eval_loss": 0.1210438683629036, |
|
"eval_roc_auc": 0.8813373302757117, |
|
"eval_runtime": 754.9986, |
|
"eval_samples_per_second": 3.823, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.23295313119888306, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1216, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3087318087318087, |
|
"eval_f1_macro": 0.7141558876633265, |
|
"eval_f1_micro": 0.8207271207689094, |
|
"eval_loss": 0.1212099939584732, |
|
"eval_roc_auc": 0.8796150036646389, |
|
"eval_runtime": 753.045, |
|
"eval_samples_per_second": 3.832, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1e-05, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.21838252246379852, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1219, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7124615854591595, |
|
"eval_f1_micro": 0.8223957468017943, |
|
"eval_loss": 0.12096676975488663, |
|
"eval_roc_auc": 0.8823577148964619, |
|
"eval_runtime": 758.2188, |
|
"eval_samples_per_second": 3.806, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1e-05, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3121968121968122, |
|
"eval_f1_macro": 0.7249978662662346, |
|
"eval_f1_micro": 0.8240642149234173, |
|
"eval_loss": 0.12144902348518372, |
|
"eval_roc_auc": 0.8875640104562932, |
|
"eval_runtime": 760.5663, |
|
"eval_samples_per_second": 3.795, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.21705362200737, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1219, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.31046431046431044, |
|
"eval_f1_macro": 0.7198781344667567, |
|
"eval_f1_micro": 0.8233893154847453, |
|
"eval_loss": 0.12115956842899323, |
|
"eval_roc_auc": 0.8863713931744356, |
|
"eval_runtime": 763.5088, |
|
"eval_samples_per_second": 3.78, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7159843095789674, |
|
"eval_f1_micro": 0.8212459126351974, |
|
"eval_loss": 0.1208055168390274, |
|
"eval_roc_auc": 0.8789555162204534, |
|
"eval_runtime": 757.8368, |
|
"eval_samples_per_second": 3.808, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.23301896452903748, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1213, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7144036362020703, |
|
"eval_f1_micro": 0.8223893065998329, |
|
"eval_loss": 0.12069901078939438, |
|
"eval_roc_auc": 0.8806577087797879, |
|
"eval_runtime": 763.0077, |
|
"eval_samples_per_second": 3.782, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7189178649032102, |
|
"eval_f1_micro": 0.8226574468966088, |
|
"eval_loss": 0.12093978375196457, |
|
"eval_roc_auc": 0.8834391187053254, |
|
"eval_runtime": 763.7654, |
|
"eval_samples_per_second": 3.779, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 93.4065934065934, |
|
"grad_norm": 0.2630571126937866, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.122, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7187657914933285, |
|
"eval_f1_micro": 0.8223438666334908, |
|
"eval_loss": 0.12092197686433792, |
|
"eval_roc_auc": 0.8828028504773688, |
|
"eval_runtime": 758.2573, |
|
"eval_samples_per_second": 3.806, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25662 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.7127077698746517, |
|
"eval_f1_micro": 0.8221934621968021, |
|
"eval_loss": 0.1206900030374527, |
|
"eval_roc_auc": 0.8807116052620565, |
|
"eval_runtime": 755.4845, |
|
"eval_samples_per_second": 3.82, |
|
"eval_steps_per_second": 0.12, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 95.23809523809524, |
|
"grad_norm": 0.32719686627388, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1209, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7160309422692305, |
|
"eval_f1_micro": 0.8218438538205979, |
|
"eval_loss": 0.12142115086317062, |
|
"eval_roc_auc": 0.882100908487046, |
|
"eval_runtime": 764.2068, |
|
"eval_samples_per_second": 3.776, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.71586766610014, |
|
"eval_f1_micro": 0.8208711661575798, |
|
"eval_loss": 0.12264719605445862, |
|
"eval_roc_auc": 0.879308955347207, |
|
"eval_runtime": 783.117, |
|
"eval_samples_per_second": 3.685, |
|
"eval_steps_per_second": 0.116, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 97.06959706959707, |
|
"grad_norm": 0.27319103479385376, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.122, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.31185031185031187, |
|
"eval_f1_macro": 0.7190138873820752, |
|
"eval_f1_micro": 0.8224561403508771, |
|
"eval_loss": 0.12095578759908676, |
|
"eval_roc_auc": 0.8842500877259815, |
|
"eval_runtime": 761.8672, |
|
"eval_samples_per_second": 3.788, |
|
"eval_steps_per_second": 0.119, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 26754 |
|
}, |
|
{ |
|
"epoch": 98.9010989010989, |
|
"grad_norm": 0.314969539642334, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1218, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7177436878101541, |
|
"eval_f1_micro": 0.821403230518803, |
|
"eval_loss": 0.12075632065534592, |
|
"eval_roc_auc": 0.8803494740196957, |
|
"eval_runtime": 749.7836, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 27027 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.3108108108108108, |
|
"eval_f1_macro": 0.7191112023643382, |
|
"eval_f1_micro": 0.8218776194467728, |
|
"eval_loss": 0.12078335881233215, |
|
"eval_roc_auc": 0.8793780496180298, |
|
"eval_runtime": 751.4627, |
|
"eval_samples_per_second": 3.841, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 100.73260073260073, |
|
"grad_norm": 0.3180501163005829, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1222, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7199208624613478, |
|
"eval_f1_micro": 0.8230599775551769, |
|
"eval_loss": 0.12071150541305542, |
|
"eval_roc_auc": 0.8825144680800833, |
|
"eval_runtime": 753.7405, |
|
"eval_samples_per_second": 3.829, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7181176324357539, |
|
"eval_f1_micro": 0.821560093739538, |
|
"eval_loss": 0.12102664262056351, |
|
"eval_roc_auc": 0.8796515695707274, |
|
"eval_runtime": 750.0067, |
|
"eval_samples_per_second": 3.848, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 27846 |
|
}, |
|
{ |
|
"epoch": 102.56410256410257, |
|
"grad_norm": 0.257368803024292, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1212, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.31115731115731116, |
|
"eval_f1_macro": 0.7156251632807489, |
|
"eval_f1_micro": 0.8218559116391932, |
|
"eval_loss": 0.12072332948446274, |
|
"eval_roc_auc": 0.879889475994201, |
|
"eval_runtime": 747.7283, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 0.122, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 28119 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.3090783090783091, |
|
"eval_f1_macro": 0.7151217785983346, |
|
"eval_f1_micro": 0.8214226220223222, |
|
"eval_loss": 0.12122868001461029, |
|
"eval_roc_auc": 0.8810201217110805, |
|
"eval_runtime": 751.9776, |
|
"eval_samples_per_second": 3.838, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 28392 |
|
}, |
|
{ |
|
"epoch": 104.3956043956044, |
|
"grad_norm": 0.2758227586746216, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"loss": 0.1204, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7175066761763569, |
|
"eval_f1_micro": 0.8216449497883642, |
|
"eval_loss": 0.12081456929445267, |
|
"eval_roc_auc": 0.882214590091632, |
|
"eval_runtime": 750.7114, |
|
"eval_samples_per_second": 3.844, |
|
"eval_steps_per_second": 0.121, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 28665 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 1.0000000000000004e-08, |
|
"step": 28665, |
|
"total_flos": 5.049640374682393e+21, |
|
"train_loss": 0.023157235795491324, |
|
"train_runtime": 62002.1626, |
|
"train_samples_per_second": 21.086, |
|
"train_steps_per_second": 0.66 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.049640374682393e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|