|
{ |
|
"best_metric": 0.11870068311691284, |
|
"best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/DinoVdeau-large-2024_04_03-with_data_aug_batch-size32_epochs150_freeze/checkpoint-31707", |
|
"epoch": 127.0, |
|
"eval_steps": 500, |
|
"global_step": 34417, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2206588881262869, |
|
"eval_f1_macro": 0.49067608571654225, |
|
"eval_f1_micro": 0.7368702869126769, |
|
"eval_loss": 0.1679287850856781, |
|
"eval_roc_auc": 0.8187531778602468, |
|
"eval_runtime": 714.8817, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.2713, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2515442690459849, |
|
"eval_f1_macro": 0.538945165530625, |
|
"eval_f1_micro": 0.761357152262652, |
|
"eval_loss": 0.15397264063358307, |
|
"eval_roc_auc": 0.8356163050775759, |
|
"eval_runtime": 720.5749, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.128, |
|
"learning_rate": 0.001, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.2525737817433082, |
|
"eval_f1_macro": 0.6053847883354035, |
|
"eval_f1_micro": 0.7728152001031681, |
|
"eval_loss": 0.14765480160713196, |
|
"eval_roc_auc": 0.8471568306864959, |
|
"eval_runtime": 730.8961, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 0.126, |
|
"learning_rate": 0.001, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.1679, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2594371997254633, |
|
"eval_f1_macro": 0.5847686766758632, |
|
"eval_f1_micro": 0.7755244755244756, |
|
"eval_loss": 0.1577611267566681, |
|
"eval_roc_auc": 0.8442033715582856, |
|
"eval_runtime": 714.9053, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2618393960192176, |
|
"eval_f1_macro": 0.6124919180480791, |
|
"eval_f1_micro": 0.7818521347933113, |
|
"eval_loss": 0.1426197737455368, |
|
"eval_roc_auc": 0.8555100487696016, |
|
"eval_runtime": 733.6285, |
|
"eval_samples_per_second": 3.972, |
|
"eval_steps_per_second": 0.125, |
|
"learning_rate": 0.001, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.1598, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.25497597803706246, |
|
"eval_f1_macro": 0.6238861296316591, |
|
"eval_f1_micro": 0.7822487732024749, |
|
"eval_loss": 0.1422213762998581, |
|
"eval_roc_auc": 0.854243572334213, |
|
"eval_runtime": 724.219, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.127, |
|
"learning_rate": 0.001, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.255662319835278, |
|
"eval_f1_macro": 0.6319974141584176, |
|
"eval_f1_micro": 0.7825118828416049, |
|
"eval_loss": 0.1426122486591339, |
|
"eval_roc_auc": 0.8534283984871422, |
|
"eval_runtime": 746.9062, |
|
"eval_samples_per_second": 3.901, |
|
"eval_steps_per_second": 0.123, |
|
"learning_rate": 0.001, |
|
"step": 1897 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.1571, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.26286890871654084, |
|
"eval_f1_macro": 0.622325808061766, |
|
"eval_f1_micro": 0.7755977927651747, |
|
"eval_loss": 0.1528300642967224, |
|
"eval_roc_auc": 0.8437122124534273, |
|
"eval_runtime": 750.7491, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.123, |
|
"learning_rate": 0.001, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.24811256005490734, |
|
"eval_f1_macro": 0.6413065292545327, |
|
"eval_f1_micro": 0.7796035913942063, |
|
"eval_loss": 0.1438213288784027, |
|
"eval_roc_auc": 0.8548646774451706, |
|
"eval_runtime": 702.3842, |
|
"eval_samples_per_second": 4.149, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.001, |
|
"step": 2439 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.1554, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.26973232669869596, |
|
"eval_f1_macro": 0.6289481397755302, |
|
"eval_f1_micro": 0.7888898226741743, |
|
"eval_loss": 0.14050152897834778, |
|
"eval_roc_auc": 0.8621391370400399, |
|
"eval_runtime": 707.8611, |
|
"eval_samples_per_second": 4.117, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.001, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.26835964310226496, |
|
"eval_f1_macro": 0.6222474437475864, |
|
"eval_f1_micro": 0.789769130122821, |
|
"eval_loss": 0.14092855155467987, |
|
"eval_roc_auc": 0.8613792306841266, |
|
"eval_runtime": 711.2194, |
|
"eval_samples_per_second": 4.097, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 2981 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.1536, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.272477693891558, |
|
"eval_f1_macro": 0.6165764997376745, |
|
"eval_f1_micro": 0.7862856154611094, |
|
"eval_loss": 0.1391688883304596, |
|
"eval_roc_auc": 0.8528316876814077, |
|
"eval_runtime": 716.1193, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.128, |
|
"learning_rate": 0.001, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.1526, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2625257378174331, |
|
"eval_f1_macro": 0.6418972133310931, |
|
"eval_f1_micro": 0.7877202761222827, |
|
"eval_loss": 0.13992685079574585, |
|
"eval_roc_auc": 0.8558891516745222, |
|
"eval_runtime": 710.6314, |
|
"eval_samples_per_second": 4.101, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 3523 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.26492793411118737, |
|
"eval_f1_macro": 0.632569342193695, |
|
"eval_f1_micro": 0.7859690345319514, |
|
"eval_loss": 0.1437946856021881, |
|
"eval_roc_auc": 0.8609422021280538, |
|
"eval_runtime": 694.5265, |
|
"eval_samples_per_second": 4.196, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 3794 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.1535, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2735072065888813, |
|
"eval_f1_macro": 0.6499317144862637, |
|
"eval_f1_micro": 0.7930450968779085, |
|
"eval_loss": 0.13769365847110748, |
|
"eval_roc_auc": 0.8625195818341326, |
|
"eval_runtime": 690.6675, |
|
"eval_samples_per_second": 4.219, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2676733013040494, |
|
"eval_f1_macro": 0.6435362275021823, |
|
"eval_f1_micro": 0.7868312757201646, |
|
"eval_loss": 0.13966824114322662, |
|
"eval_roc_auc": 0.8526053782518377, |
|
"eval_runtime": 692.8928, |
|
"eval_samples_per_second": 4.206, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 4336 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.1517, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.26458476321207963, |
|
"eval_f1_macro": 0.6400976985447264, |
|
"eval_f1_micro": 0.7928011464216472, |
|
"eval_loss": 0.1382310837507248, |
|
"eval_roc_auc": 0.8634350422987906, |
|
"eval_runtime": 684.8353, |
|
"eval_samples_per_second": 4.255, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.001, |
|
"step": 4607 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.26835964310226496, |
|
"eval_f1_macro": 0.6285887470600311, |
|
"eval_f1_micro": 0.7912449392712552, |
|
"eval_loss": 0.1392030268907547, |
|
"eval_roc_auc": 0.8624142094617062, |
|
"eval_runtime": 684.0603, |
|
"eval_samples_per_second": 4.26, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.001, |
|
"step": 4878 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.001, |
|
"loss": 0.1524, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.26355525051475637, |
|
"eval_f1_macro": 0.6182634857374021, |
|
"eval_f1_micro": 0.7874116344434035, |
|
"eval_loss": 0.1391826868057251, |
|
"eval_roc_auc": 0.8575979217492725, |
|
"eval_runtime": 683.6418, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 5149 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.25978037062457104, |
|
"eval_f1_macro": 0.6286353323757679, |
|
"eval_f1_micro": 0.7878349022447502, |
|
"eval_loss": 0.13860712945461273, |
|
"eval_roc_auc": 0.857768912450985, |
|
"eval_runtime": 691.5194, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.1527, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2601235415236788, |
|
"eval_f1_macro": 0.6408380159549439, |
|
"eval_f1_micro": 0.7879620486841541, |
|
"eval_loss": 0.13738416135311127, |
|
"eval_roc_auc": 0.8556654235498968, |
|
"eval_runtime": 685.1425, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.001, |
|
"step": 5691 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.2704186684969115, |
|
"eval_f1_macro": 0.6476322873247978, |
|
"eval_f1_micro": 0.7897032412554519, |
|
"eval_loss": 0.13765838742256165, |
|
"eval_roc_auc": 0.8577058498082936, |
|
"eval_runtime": 697.7818, |
|
"eval_samples_per_second": 4.176, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 5962 |
|
}, |
|
{ |
|
"epoch": 22.14, |
|
"learning_rate": 0.001, |
|
"loss": 0.1513, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.26973232669869596, |
|
"eval_f1_macro": 0.6442899469566483, |
|
"eval_f1_micro": 0.7955080753701211, |
|
"eval_loss": 0.13733763992786407, |
|
"eval_roc_auc": 0.865520436446917, |
|
"eval_runtime": 694.652, |
|
"eval_samples_per_second": 4.195, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 6233 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.1514, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2656142759094029, |
|
"eval_f1_macro": 0.6476856744170203, |
|
"eval_f1_micro": 0.7877039652128988, |
|
"eval_loss": 0.15933051705360413, |
|
"eval_roc_auc": 0.854747407398644, |
|
"eval_runtime": 693.7997, |
|
"eval_samples_per_second": 4.2, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 6504 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2656142759094029, |
|
"eval_f1_macro": 0.6476685424002145, |
|
"eval_f1_micro": 0.7909313518534156, |
|
"eval_loss": 0.1371144950389862, |
|
"eval_roc_auc": 0.8619333499582761, |
|
"eval_runtime": 697.1431, |
|
"eval_samples_per_second": 4.18, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"learning_rate": 0.001, |
|
"loss": 0.1513, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.26664378860672616, |
|
"eval_f1_macro": 0.6272636530018297, |
|
"eval_f1_micro": 0.7871246489522575, |
|
"eval_loss": 0.13742324709892273, |
|
"eval_roc_auc": 0.8534693919460551, |
|
"eval_runtime": 694.4047, |
|
"eval_samples_per_second": 4.196, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 7046 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.26458476321207963, |
|
"eval_f1_macro": 0.6470102889103709, |
|
"eval_f1_micro": 0.7933764066578238, |
|
"eval_loss": 0.13733525574207306, |
|
"eval_roc_auc": 0.8595147784088812, |
|
"eval_runtime": 705.8388, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.001, |
|
"step": 7317 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.001, |
|
"loss": 0.1508, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2735072065888813, |
|
"eval_f1_macro": 0.652320369204269, |
|
"eval_f1_micro": 0.7932826525791349, |
|
"eval_loss": 0.13527436554431915, |
|
"eval_roc_auc": 0.8584141813020159, |
|
"eval_runtime": 689.3753, |
|
"eval_samples_per_second": 4.227, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 7588 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.27762525737817434, |
|
"eval_f1_macro": 0.652221916857101, |
|
"eval_f1_micro": 0.7959942533592496, |
|
"eval_loss": 0.1361834853887558, |
|
"eval_roc_auc": 0.864468679187198, |
|
"eval_runtime": 711.9932, |
|
"eval_samples_per_second": 4.093, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 7859 |
|
}, |
|
{ |
|
"epoch": 29.52, |
|
"learning_rate": 0.001, |
|
"loss": 0.1506, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.25051475634866166, |
|
"eval_f1_macro": 0.6283472874420969, |
|
"eval_f1_micro": 0.784903397164032, |
|
"eval_loss": 0.13839198648929596, |
|
"eval_roc_auc": 0.8546034947804995, |
|
"eval_runtime": 711.3244, |
|
"eval_samples_per_second": 4.097, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.2717913520933425, |
|
"eval_f1_macro": 0.6629936811365008, |
|
"eval_f1_micro": 0.7963528413910094, |
|
"eval_loss": 0.13423041999340057, |
|
"eval_roc_auc": 0.8636329317556207, |
|
"eval_runtime": 717.6936, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.128, |
|
"learning_rate": 0.001, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"learning_rate": 0.001, |
|
"loss": 0.151, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2717913520933425, |
|
"eval_f1_macro": 0.655637689548565, |
|
"eval_f1_micro": 0.7967574308875494, |
|
"eval_loss": 0.13658006489276886, |
|
"eval_roc_auc": 0.8695589279972756, |
|
"eval_runtime": 709.164, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.001, |
|
"step": 8672 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.2824296499656829, |
|
"eval_f1_macro": 0.6635088812977026, |
|
"eval_f1_micro": 0.7984512261126608, |
|
"eval_loss": 0.13591675460338593, |
|
"eval_roc_auc": 0.8700680281449957, |
|
"eval_runtime": 711.6984, |
|
"eval_samples_per_second": 4.094, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 0.001, |
|
"step": 8943 |
|
}, |
|
{ |
|
"epoch": 33.21, |
|
"learning_rate": 0.001, |
|
"loss": 0.1507, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.28140013726835966, |
|
"eval_f1_macro": 0.640009703489063, |
|
"eval_f1_micro": 0.7998982533491605, |
|
"eval_loss": 0.13349105417728424, |
|
"eval_roc_auc": 0.8656642978283616, |
|
"eval_runtime": 702.7082, |
|
"eval_samples_per_second": 4.147, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.001, |
|
"step": 9214 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.272477693891558, |
|
"eval_f1_macro": 0.6519975762807232, |
|
"eval_f1_micro": 0.7962736584748978, |
|
"eval_loss": 0.13431623578071594, |
|
"eval_roc_auc": 0.8653056916528975, |
|
"eval_runtime": 695.9796, |
|
"eval_samples_per_second": 4.187, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 9485 |
|
}, |
|
{ |
|
"epoch": 35.06, |
|
"learning_rate": 0.001, |
|
"loss": 0.1495, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.26355525051475637, |
|
"eval_f1_macro": 0.6451443559854592, |
|
"eval_f1_micro": 0.7924289154590393, |
|
"eval_loss": 0.14291881024837494, |
|
"eval_roc_auc": 0.8625517456725394, |
|
"eval_runtime": 695.1357, |
|
"eval_samples_per_second": 4.192, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 9756 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.1496, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2731640356897735, |
|
"eval_f1_macro": 0.653137873575441, |
|
"eval_f1_micro": 0.7981288539230278, |
|
"eval_loss": 0.13305164873600006, |
|
"eval_roc_auc": 0.8638193078136003, |
|
"eval_runtime": 697.8225, |
|
"eval_samples_per_second": 4.176, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 10027 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.26835964310226496, |
|
"eval_f1_macro": 0.6306015382070221, |
|
"eval_f1_micro": 0.7938126806051335, |
|
"eval_loss": 0.13497667014598846, |
|
"eval_roc_auc": 0.8616735348011272, |
|
"eval_runtime": 691.2148, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 10298 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 0.001, |
|
"loss": 0.1503, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.2800274536719286, |
|
"eval_f1_macro": 0.6464711728800093, |
|
"eval_f1_micro": 0.7983622472668946, |
|
"eval_loss": 0.1351996511220932, |
|
"eval_roc_auc": 0.866066786187883, |
|
"eval_runtime": 704.3219, |
|
"eval_samples_per_second": 4.137, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.001, |
|
"step": 10569 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.27282086479066575, |
|
"eval_f1_macro": 0.6271011637782951, |
|
"eval_f1_micro": 0.792462504807076, |
|
"eval_loss": 0.13469766080379486, |
|
"eval_roc_auc": 0.8594016621216213, |
|
"eval_runtime": 698.796, |
|
"eval_samples_per_second": 4.17, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.1505, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2721345229924502, |
|
"eval_f1_macro": 0.6600650686410704, |
|
"eval_f1_micro": 0.7934581450398106, |
|
"eval_loss": 0.13396936655044556, |
|
"eval_roc_auc": 0.8579358774851804, |
|
"eval_runtime": 697.9239, |
|
"eval_samples_per_second": 4.175, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.001, |
|
"step": 11111 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.27110501029512696, |
|
"eval_f1_macro": 0.6636104085895331, |
|
"eval_f1_micro": 0.7982560108364375, |
|
"eval_loss": 0.13215309381484985, |
|
"eval_roc_auc": 0.8652014818411935, |
|
"eval_runtime": 682.8814, |
|
"eval_samples_per_second": 4.267, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 11382 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"learning_rate": 0.001, |
|
"loss": 0.1491, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2735072065888813, |
|
"eval_f1_macro": 0.6493239813433992, |
|
"eval_f1_micro": 0.7948717948717948, |
|
"eval_loss": 0.13603103160858154, |
|
"eval_roc_auc": 0.8634709756212143, |
|
"eval_runtime": 691.0843, |
|
"eval_samples_per_second": 4.217, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.001, |
|
"step": 11653 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.28140013726835966, |
|
"eval_f1_macro": 0.6400431467798345, |
|
"eval_f1_micro": 0.7955270207066627, |
|
"eval_loss": 0.13608315587043762, |
|
"eval_roc_auc": 0.8624513565815523, |
|
"eval_runtime": 684.569, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.001, |
|
"step": 11924 |
|
}, |
|
{ |
|
"epoch": 44.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.1507, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.28140013726835966, |
|
"eval_f1_macro": 0.6424034446314527, |
|
"eval_f1_micro": 0.7970817780794026, |
|
"eval_loss": 0.13283775746822357, |
|
"eval_roc_auc": 0.863982631455529, |
|
"eval_runtime": 683.5579, |
|
"eval_samples_per_second": 4.263, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 12195 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2786547700754976, |
|
"eval_f1_macro": 0.6468829157126516, |
|
"eval_f1_micro": 0.7938707872422847, |
|
"eval_loss": 0.13275618851184845, |
|
"eval_roc_auc": 0.8581338694154104, |
|
"eval_runtime": 680.6581, |
|
"eval_samples_per_second": 4.281, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 12466 |
|
}, |
|
{ |
|
"epoch": 46.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.1495, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.27522306108442, |
|
"eval_f1_macro": 0.6351226382655659, |
|
"eval_f1_micro": 0.7977320453590928, |
|
"eval_loss": 0.133217915892601, |
|
"eval_roc_auc": 0.8671532629726034, |
|
"eval_runtime": 679.0852, |
|
"eval_samples_per_second": 4.291, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 12737 |
|
}, |
|
{ |
|
"epoch": 47.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.1498, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.2817433081674674, |
|
"eval_f1_macro": 0.6490013214958164, |
|
"eval_f1_micro": 0.8012753282711751, |
|
"eval_loss": 0.1325378566980362, |
|
"eval_roc_auc": 0.8694327770935429, |
|
"eval_runtime": 680.9219, |
|
"eval_samples_per_second": 4.279, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.001, |
|
"step": 13008 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2882635552505148, |
|
"eval_f1_macro": 0.6738484837965685, |
|
"eval_f1_micro": 0.8061649892618015, |
|
"eval_loss": 0.12826864421367645, |
|
"eval_roc_auc": 0.8710295165867058, |
|
"eval_runtime": 681.5467, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 13279 |
|
}, |
|
{ |
|
"epoch": 49.82, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1416, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2872340425531915, |
|
"eval_f1_macro": 0.6734470768849717, |
|
"eval_f1_micro": 0.8086902026321288, |
|
"eval_loss": 0.12865300476551056, |
|
"eval_roc_auc": 0.8747272405310621, |
|
"eval_runtime": 680.1838, |
|
"eval_samples_per_second": 4.284, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2899794097460535, |
|
"eval_f1_macro": 0.6713719689925478, |
|
"eval_f1_micro": 0.8067049484884311, |
|
"eval_loss": 0.12803621590137482, |
|
"eval_roc_auc": 0.870555059108937, |
|
"eval_runtime": 682.2484, |
|
"eval_samples_per_second": 4.271, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 13821 |
|
}, |
|
{ |
|
"epoch": 51.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1387, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.2899794097460535, |
|
"eval_f1_macro": 0.6744045610607882, |
|
"eval_f1_micro": 0.8067475584492453, |
|
"eval_loss": 0.12618477642536163, |
|
"eval_roc_auc": 0.8701989061466676, |
|
"eval_runtime": 687.4248, |
|
"eval_samples_per_second": 4.239, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 14092 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.2910089224433768, |
|
"eval_f1_macro": 0.6763777606257594, |
|
"eval_f1_micro": 0.8094476254631189, |
|
"eval_loss": 0.1262361854314804, |
|
"eval_roc_auc": 0.8728950289492735, |
|
"eval_runtime": 697.5202, |
|
"eval_samples_per_second": 4.178, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 14363 |
|
}, |
|
{ |
|
"epoch": 53.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1356, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2947838023335621, |
|
"eval_f1_macro": 0.6743662487872923, |
|
"eval_f1_micro": 0.809105193867584, |
|
"eval_loss": 0.12573254108428955, |
|
"eval_roc_auc": 0.8701859740035948, |
|
"eval_runtime": 693.4876, |
|
"eval_samples_per_second": 4.202, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 14634 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.2947838023335621, |
|
"eval_f1_macro": 0.681373956781595, |
|
"eval_f1_micro": 0.8106019238039233, |
|
"eval_loss": 0.12566907703876495, |
|
"eval_roc_auc": 0.8742077383085138, |
|
"eval_runtime": 700.1378, |
|
"eval_samples_per_second": 4.162, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 14905 |
|
}, |
|
{ |
|
"epoch": 55.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1348, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3009608785175017, |
|
"eval_f1_macro": 0.6772158941721765, |
|
"eval_f1_micro": 0.8107721439091101, |
|
"eval_loss": 0.12600058317184448, |
|
"eval_roc_auc": 0.873783436948714, |
|
"eval_runtime": 679.772, |
|
"eval_samples_per_second": 4.287, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 15176 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.29855868222374743, |
|
"eval_f1_macro": 0.6806264224832896, |
|
"eval_f1_micro": 0.8128972900635664, |
|
"eval_loss": 0.12499917298555374, |
|
"eval_roc_auc": 0.8767727269259479, |
|
"eval_runtime": 685.6404, |
|
"eval_samples_per_second": 4.25, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 15447 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.135, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.3081674673987646, |
|
"eval_f1_macro": 0.6858667117679004, |
|
"eval_f1_micro": 0.8141957160856784, |
|
"eval_loss": 0.12423347681760788, |
|
"eval_roc_auc": 0.8762249584407599, |
|
"eval_runtime": 700.338, |
|
"eval_samples_per_second": 4.161, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 15718 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.3026767330130405, |
|
"eval_f1_macro": 0.6869701850967913, |
|
"eval_f1_micro": 0.8124398308986648, |
|
"eval_loss": 0.12452811747789383, |
|
"eval_roc_auc": 0.8762597292024611, |
|
"eval_runtime": 691.5065, |
|
"eval_samples_per_second": 4.214, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 15989 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1334, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.3030199039121482, |
|
"eval_f1_macro": 0.6853545892350839, |
|
"eval_f1_micro": 0.8137861803580391, |
|
"eval_loss": 0.1242317408323288, |
|
"eval_roc_auc": 0.8772149949469599, |
|
"eval_runtime": 700.2391, |
|
"eval_samples_per_second": 4.161, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 60.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1335, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.3064516129032258, |
|
"eval_f1_macro": 0.6889451502143565, |
|
"eval_f1_micro": 0.8139671855279764, |
|
"eval_loss": 0.12397264689207077, |
|
"eval_roc_auc": 0.8756026651507299, |
|
"eval_runtime": 691.2006, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 16531 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.3016472203157172, |
|
"eval_f1_macro": 0.6808873228168837, |
|
"eval_f1_micro": 0.8152336604024614, |
|
"eval_loss": 0.12486530840396881, |
|
"eval_roc_auc": 0.8798070104112615, |
|
"eval_runtime": 696.5149, |
|
"eval_samples_per_second": 4.184, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 62.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1308, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.30679478380233355, |
|
"eval_f1_macro": 0.6848490082628171, |
|
"eval_f1_micro": 0.814602720114531, |
|
"eval_loss": 0.1233312338590622, |
|
"eval_roc_auc": 0.8756913615144166, |
|
"eval_runtime": 687.8446, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 17073 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3057652711050103, |
|
"eval_f1_macro": 0.6908433037124228, |
|
"eval_f1_micro": 0.8151443922095367, |
|
"eval_loss": 0.12344498932361603, |
|
"eval_roc_auc": 0.8769218021539817, |
|
"eval_runtime": 702.1676, |
|
"eval_samples_per_second": 4.15, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 17344 |
|
}, |
|
{ |
|
"epoch": 64.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1326, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.303363074811256, |
|
"eval_f1_macro": 0.6812168886607934, |
|
"eval_f1_micro": 0.8124392614188533, |
|
"eval_loss": 0.12330327183008194, |
|
"eval_roc_auc": 0.8734876628507912, |
|
"eval_runtime": 680.7902, |
|
"eval_samples_per_second": 4.28, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 17615 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.3026767330130405, |
|
"eval_f1_macro": 0.6878311502092693, |
|
"eval_f1_micro": 0.8144785071642787, |
|
"eval_loss": 0.12320297956466675, |
|
"eval_roc_auc": 0.8788401544128625, |
|
"eval_runtime": 690.4713, |
|
"eval_samples_per_second": 4.22, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 17886 |
|
}, |
|
{ |
|
"epoch": 66.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1306, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.3074811256005491, |
|
"eval_f1_macro": 0.6857441975499322, |
|
"eval_f1_micro": 0.8115152031343156, |
|
"eval_loss": 0.1227714866399765, |
|
"eval_roc_auc": 0.8706963391854371, |
|
"eval_runtime": 709.7385, |
|
"eval_samples_per_second": 4.106, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 18157 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.3074811256005491, |
|
"eval_f1_macro": 0.6913054019394733, |
|
"eval_f1_micro": 0.8153315962007229, |
|
"eval_loss": 0.12259615212678909, |
|
"eval_roc_auc": 0.8766517898688044, |
|
"eval_runtime": 686.725, |
|
"eval_samples_per_second": 4.243, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 18428 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1299, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.30851063829787234, |
|
"eval_f1_macro": 0.676353464691654, |
|
"eval_f1_micro": 0.8143055965585593, |
|
"eval_loss": 0.12271784245967865, |
|
"eval_roc_auc": 0.8750562129363642, |
|
"eval_runtime": 693.9986, |
|
"eval_samples_per_second": 4.199, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 18699 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.31056966369251887, |
|
"eval_f1_macro": 0.6999234521712909, |
|
"eval_f1_micro": 0.81868109179502, |
|
"eval_loss": 0.12295936793088913, |
|
"eval_roc_auc": 0.8837914226495347, |
|
"eval_runtime": 690.298, |
|
"eval_samples_per_second": 4.221, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 18970 |
|
}, |
|
{ |
|
"epoch": 70.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1295, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.30679478380233355, |
|
"eval_f1_macro": 0.6893382907323766, |
|
"eval_f1_micro": 0.8152834008097165, |
|
"eval_loss": 0.12247700244188309, |
|
"eval_roc_auc": 0.8756051952189744, |
|
"eval_runtime": 708.7396, |
|
"eval_samples_per_second": 4.112, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 0.0001, |
|
"step": 19241 |
|
}, |
|
{ |
|
"epoch": 71.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1289, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.30370624571036375, |
|
"eval_f1_macro": 0.6867583154614194, |
|
"eval_f1_micro": 0.815056734916049, |
|
"eval_loss": 0.12231950461864471, |
|
"eval_roc_auc": 0.877607106850003, |
|
"eval_runtime": 677.1006, |
|
"eval_samples_per_second": 4.304, |
|
"eval_steps_per_second": 0.136, |
|
"learning_rate": 0.0001, |
|
"step": 19512 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.30542210020590255, |
|
"eval_f1_macro": 0.6917904729736315, |
|
"eval_f1_micro": 0.8165095327886026, |
|
"eval_loss": 0.12229206413030624, |
|
"eval_roc_auc": 0.8781856460477259, |
|
"eval_runtime": 692.4475, |
|
"eval_samples_per_second": 4.208, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 19783 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1279, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.30542210020590255, |
|
"eval_f1_macro": 0.6855916835036744, |
|
"eval_f1_micro": 0.8142863173892853, |
|
"eval_loss": 0.12248736619949341, |
|
"eval_roc_auc": 0.8747476347871186, |
|
"eval_runtime": 687.0839, |
|
"eval_samples_per_second": 4.241, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 20054 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.31022649279341113, |
|
"eval_f1_macro": 0.6878236573831232, |
|
"eval_f1_micro": 0.8167385749591589, |
|
"eval_loss": 0.12206920981407166, |
|
"eval_roc_auc": 0.8784119298589457, |
|
"eval_runtime": 681.8157, |
|
"eval_samples_per_second": 4.274, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 20325 |
|
}, |
|
{ |
|
"epoch": 75.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1276, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.31674673987645846, |
|
"eval_f1_macro": 0.6963741846422794, |
|
"eval_f1_micro": 0.8190190440471725, |
|
"eval_loss": 0.12172180414199829, |
|
"eval_roc_auc": 0.8812250665245784, |
|
"eval_runtime": 672.3327, |
|
"eval_samples_per_second": 4.334, |
|
"eval_steps_per_second": 0.137, |
|
"learning_rate": 0.0001, |
|
"step": 20596 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.31022649279341113, |
|
"eval_f1_macro": 0.6940319651093022, |
|
"eval_f1_micro": 0.8179309191268713, |
|
"eval_loss": 0.12170004099607468, |
|
"eval_roc_auc": 0.8795832054315861, |
|
"eval_runtime": 683.2195, |
|
"eval_samples_per_second": 4.265, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 20867 |
|
}, |
|
{ |
|
"epoch": 77.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1274, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.3081674673987646, |
|
"eval_f1_macro": 0.6858950802374723, |
|
"eval_f1_micro": 0.8143468859965235, |
|
"eval_loss": 0.12156965583562851, |
|
"eval_roc_auc": 0.8735390327661822, |
|
"eval_runtime": 682.8416, |
|
"eval_samples_per_second": 4.267, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 21138 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.31468771448181193, |
|
"eval_f1_macro": 0.6944979922976369, |
|
"eval_f1_micro": 0.8164588948787063, |
|
"eval_loss": 0.1215372309088707, |
|
"eval_roc_auc": 0.8766491829455099, |
|
"eval_runtime": 683.544, |
|
"eval_samples_per_second": 4.263, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 21409 |
|
}, |
|
{ |
|
"epoch": 79.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1269, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.31468771448181193, |
|
"eval_f1_macro": 0.6999268076002686, |
|
"eval_f1_micro": 0.8192881937183724, |
|
"eval_loss": 0.12135831266641617, |
|
"eval_roc_auc": 0.8802967507718976, |
|
"eval_runtime": 676.6048, |
|
"eval_samples_per_second": 4.307, |
|
"eval_steps_per_second": 0.136, |
|
"learning_rate": 0.0001, |
|
"step": 21680 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.3112560054907344, |
|
"eval_f1_macro": 0.697411996468389, |
|
"eval_f1_micro": 0.81943004106691, |
|
"eval_loss": 0.12142007052898407, |
|
"eval_roc_auc": 0.8828322337109193, |
|
"eval_runtime": 681.4379, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 21951 |
|
}, |
|
{ |
|
"epoch": 81.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1259, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.31022649279341113, |
|
"eval_f1_macro": 0.695644238424867, |
|
"eval_f1_micro": 0.817148370317547, |
|
"eval_loss": 0.12121300399303436, |
|
"eval_roc_auc": 0.8782139123103657, |
|
"eval_runtime": 687.8898, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 22222 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.31228551818805766, |
|
"eval_f1_macro": 0.6969593372601354, |
|
"eval_f1_micro": 0.8189676877885018, |
|
"eval_loss": 0.12076255679130554, |
|
"eval_roc_auc": 0.8791048759964009, |
|
"eval_runtime": 683.6587, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 22493 |
|
}, |
|
{ |
|
"epoch": 83.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1258, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.31537405628002746, |
|
"eval_f1_macro": 0.699678101503351, |
|
"eval_f1_micro": 0.8203842940685045, |
|
"eval_loss": 0.12093241512775421, |
|
"eval_roc_auc": 0.8812674531093634, |
|
"eval_runtime": 674.7538, |
|
"eval_samples_per_second": 4.319, |
|
"eval_steps_per_second": 0.136, |
|
"learning_rate": 0.0001, |
|
"step": 22764 |
|
}, |
|
{ |
|
"epoch": 84.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1251, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.3064516129032258, |
|
"eval_f1_macro": 0.6934546509096284, |
|
"eval_f1_micro": 0.8163317114448911, |
|
"eval_loss": 0.12106911092996597, |
|
"eval_roc_auc": 0.875220724757365, |
|
"eval_runtime": 682.5674, |
|
"eval_samples_per_second": 4.269, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 23035 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.31537405628002746, |
|
"eval_f1_macro": 0.6971612900401489, |
|
"eval_f1_micro": 0.8200962947456564, |
|
"eval_loss": 0.12031004577875137, |
|
"eval_roc_auc": 0.8804175853556411, |
|
"eval_runtime": 685.5285, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 23306 |
|
}, |
|
{ |
|
"epoch": 86.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1251, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3150308853809197, |
|
"eval_f1_macro": 0.6946815591935039, |
|
"eval_f1_micro": 0.8182238085240395, |
|
"eval_loss": 0.12081247568130493, |
|
"eval_roc_auc": 0.8785070282482941, |
|
"eval_runtime": 683.1403, |
|
"eval_samples_per_second": 4.266, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 23577 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.31537405628002746, |
|
"eval_f1_macro": 0.6936867859001314, |
|
"eval_f1_micro": 0.8180521768308028, |
|
"eval_loss": 0.12136104702949524, |
|
"eval_roc_auc": 0.8787500393049534, |
|
"eval_runtime": 674.7599, |
|
"eval_samples_per_second": 4.319, |
|
"eval_steps_per_second": 0.136, |
|
"learning_rate": 0.0001, |
|
"step": 23848 |
|
}, |
|
{ |
|
"epoch": 88.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1246, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.31056966369251887, |
|
"eval_f1_macro": 0.6953152626493269, |
|
"eval_f1_micro": 0.8201318053981447, |
|
"eval_loss": 0.12058280408382416, |
|
"eval_roc_auc": 0.8796589952705502, |
|
"eval_runtime": 682.7847, |
|
"eval_samples_per_second": 4.268, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 24119 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.3164035689773507, |
|
"eval_f1_macro": 0.696027478931947, |
|
"eval_f1_micro": 0.8213629530649741, |
|
"eval_loss": 0.12102781236171722, |
|
"eval_roc_auc": 0.8819102943655497, |
|
"eval_runtime": 686.1462, |
|
"eval_samples_per_second": 4.247, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 0.0001, |
|
"step": 24390 |
|
}, |
|
{ |
|
"epoch": 90.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1239, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.31537405628002746, |
|
"eval_f1_macro": 0.7005956031917913, |
|
"eval_f1_micro": 0.8201800293070964, |
|
"eval_loss": 0.1198815330862999, |
|
"eval_roc_auc": 0.8804638695161929, |
|
"eval_runtime": 683.8668, |
|
"eval_samples_per_second": 4.261, |
|
"eval_steps_per_second": 0.135, |
|
"learning_rate": 0.0001, |
|
"step": 24661 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.316060398078243, |
|
"eval_f1_macro": 0.703909588701163, |
|
"eval_f1_micro": 0.8221735718121359, |
|
"eval_loss": 0.12077653408050537, |
|
"eval_roc_auc": 0.8856479260631466, |
|
"eval_runtime": 672.8479, |
|
"eval_samples_per_second": 4.331, |
|
"eval_steps_per_second": 0.137, |
|
"learning_rate": 0.0001, |
|
"step": 24932 |
|
}, |
|
{ |
|
"epoch": 92.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1238, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.31331503088538093, |
|
"eval_f1_macro": 0.7003774810286753, |
|
"eval_f1_micro": 0.8199147228492601, |
|
"eval_loss": 0.12042003870010376, |
|
"eval_roc_auc": 0.8807814566143944, |
|
"eval_runtime": 693.4839, |
|
"eval_samples_per_second": 4.202, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 0.0001, |
|
"step": 25203 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.3143445435827042, |
|
"eval_f1_macro": 0.7036330518306199, |
|
"eval_f1_micro": 0.8230411686586985, |
|
"eval_loss": 0.1200033500790596, |
|
"eval_roc_auc": 0.8847080200565651, |
|
"eval_runtime": 700.664, |
|
"eval_samples_per_second": 4.159, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 25474 |
|
}, |
|
{ |
|
"epoch": 94.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1237, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.318805765271105, |
|
"eval_f1_macro": 0.7068704040733741, |
|
"eval_f1_micro": 0.8209067379143359, |
|
"eval_loss": 0.12061866372823715, |
|
"eval_roc_auc": 0.8817328760198262, |
|
"eval_runtime": 701.3444, |
|
"eval_samples_per_second": 4.155, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 25745 |
|
}, |
|
{ |
|
"epoch": 95.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1234, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.31468771448181193, |
|
"eval_f1_macro": 0.7059911076490224, |
|
"eval_f1_micro": 0.8222408026755854, |
|
"eval_loss": 0.12005680054426193, |
|
"eval_roc_auc": 0.8820439842373163, |
|
"eval_runtime": 697.7134, |
|
"eval_samples_per_second": 4.177, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 0.0001, |
|
"step": 26016 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.30919698009608787, |
|
"eval_f1_macro": 0.7073615141822082, |
|
"eval_f1_micro": 0.8207813798836243, |
|
"eval_loss": 0.12039094418287277, |
|
"eval_roc_auc": 0.882995480497798, |
|
"eval_runtime": 701.6443, |
|
"eval_samples_per_second": 4.153, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 0.0001, |
|
"step": 26287 |
|
}, |
|
{ |
|
"epoch": 97.79, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1215, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.318805765271105, |
|
"eval_f1_macro": 0.7125305658957566, |
|
"eval_f1_micro": 0.8240848103362597, |
|
"eval_loss": 0.12003627419471741, |
|
"eval_roc_auc": 0.8859474440267361, |
|
"eval_runtime": 703.1401, |
|
"eval_samples_per_second": 4.144, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 26558 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.31708991077556625, |
|
"eval_f1_macro": 0.7126519915135826, |
|
"eval_f1_micro": 0.8246739805423309, |
|
"eval_loss": 0.11952651292085648, |
|
"eval_roc_auc": 0.8863652921241748, |
|
"eval_runtime": 704.3933, |
|
"eval_samples_per_second": 4.137, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 26829 |
|
}, |
|
{ |
|
"epoch": 99.63, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1208, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.3164035689773507, |
|
"eval_f1_macro": 0.7076695321633534, |
|
"eval_f1_micro": 0.8225050234427328, |
|
"eval_loss": 0.11920821666717529, |
|
"eval_roc_auc": 0.8817868272401618, |
|
"eval_runtime": 693.6368, |
|
"eval_samples_per_second": 4.201, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.31708991077556625, |
|
"eval_f1_macro": 0.706035453076238, |
|
"eval_f1_micro": 0.8232053422370618, |
|
"eval_loss": 0.11927199363708496, |
|
"eval_roc_auc": 0.8830828349723125, |
|
"eval_runtime": 710.5245, |
|
"eval_samples_per_second": 4.101, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 1e-05, |
|
"step": 27371 |
|
}, |
|
{ |
|
"epoch": 101.48, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1195, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.31846259437199725, |
|
"eval_f1_macro": 0.710532612125401, |
|
"eval_f1_micro": 0.8237933039793969, |
|
"eval_loss": 0.11972019821405411, |
|
"eval_roc_auc": 0.8848494282248152, |
|
"eval_runtime": 702.1024, |
|
"eval_samples_per_second": 4.15, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 27642 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.31400137268359646, |
|
"eval_f1_macro": 0.7075672175938867, |
|
"eval_f1_micro": 0.8215544737283866, |
|
"eval_loss": 0.11914487928152084, |
|
"eval_roc_auc": 0.880506256100978, |
|
"eval_runtime": 705.9918, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1e-05, |
|
"step": 27913 |
|
}, |
|
{ |
|
"epoch": 103.32, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1197, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.32017844886753605, |
|
"eval_f1_macro": 0.7063140405914397, |
|
"eval_f1_micro": 0.8238771177621446, |
|
"eval_loss": 0.11925092339515686, |
|
"eval_roc_auc": 0.8842515548583585, |
|
"eval_runtime": 693.831, |
|
"eval_samples_per_second": 4.2, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1e-05, |
|
"step": 28184 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.3126286890871654, |
|
"eval_f1_macro": 0.7070687169886732, |
|
"eval_f1_micro": 0.8213039640169827, |
|
"eval_loss": 0.11896480619907379, |
|
"eval_roc_auc": 0.8798775351669367, |
|
"eval_runtime": 704.4845, |
|
"eval_samples_per_second": 4.136, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 28455 |
|
}, |
|
{ |
|
"epoch": 105.17, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1189, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.32017844886753605, |
|
"eval_f1_macro": 0.7060565909099816, |
|
"eval_f1_micro": 0.8232792762746488, |
|
"eval_loss": 0.11903885006904602, |
|
"eval_roc_auc": 0.8834646985106276, |
|
"eval_runtime": 702.4127, |
|
"eval_samples_per_second": 4.149, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 28726 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.3164035689773507, |
|
"eval_f1_macro": 0.7038085792556672, |
|
"eval_f1_micro": 0.822364833689862, |
|
"eval_loss": 0.1193847730755806, |
|
"eval_roc_auc": 0.8811169591371075, |
|
"eval_runtime": 704.7837, |
|
"eval_samples_per_second": 4.135, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 28997 |
|
}, |
|
{ |
|
"epoch": 107.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1194, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_f1_macro": 0.7110378528873161, |
|
"eval_f1_micro": 0.8232445520581114, |
|
"eval_loss": 0.11912781000137329, |
|
"eval_roc_auc": 0.883028832515277, |
|
"eval_runtime": 712.626, |
|
"eval_samples_per_second": 4.089, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 1e-05, |
|
"step": 29268 |
|
}, |
|
{ |
|
"epoch": 108.86, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1187, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.317433081674674, |
|
"eval_f1_macro": 0.7101233565606322, |
|
"eval_f1_micro": 0.8229869639937963, |
|
"eval_loss": 0.11885793507099152, |
|
"eval_roc_auc": 0.8816620950806512, |
|
"eval_runtime": 704.8781, |
|
"eval_samples_per_second": 4.134, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 29539 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.316060398078243, |
|
"eval_f1_macro": 0.7043927351077296, |
|
"eval_f1_micro": 0.8223593100843511, |
|
"eval_loss": 0.11920594424009323, |
|
"eval_roc_auc": 0.8809626700624849, |
|
"eval_runtime": 698.225, |
|
"eval_samples_per_second": 4.173, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1e-05, |
|
"step": 29810 |
|
}, |
|
{ |
|
"epoch": 110.7, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1185, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.317433081674674, |
|
"eval_f1_macro": 0.7082791431366098, |
|
"eval_f1_micro": 0.8226405643444505, |
|
"eval_loss": 0.1191883385181427, |
|
"eval_roc_auc": 0.8827318446199321, |
|
"eval_runtime": 708.9619, |
|
"eval_samples_per_second": 4.11, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1e-05, |
|
"step": 30081 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.3205216197666438, |
|
"eval_f1_macro": 0.7093446393354451, |
|
"eval_f1_micro": 0.8239410221167062, |
|
"eval_loss": 0.11904006451368332, |
|
"eval_roc_auc": 0.8841101210717583, |
|
"eval_runtime": 709.1728, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1e-05, |
|
"step": 30352 |
|
}, |
|
{ |
|
"epoch": 112.55, |
|
"learning_rate": 1e-05, |
|
"loss": 0.119, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.31708991077556625, |
|
"eval_f1_macro": 0.708022770004008, |
|
"eval_f1_micro": 0.8232849960526862, |
|
"eval_loss": 0.11945341527462006, |
|
"eval_roc_auc": 0.8844778642879284, |
|
"eval_runtime": 708.0786, |
|
"eval_samples_per_second": 4.115, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1e-05, |
|
"step": 30623 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.3181194234728895, |
|
"eval_f1_macro": 0.7061751648131201, |
|
"eval_f1_micro": 0.82203889216264, |
|
"eval_loss": 0.11900585889816284, |
|
"eval_roc_auc": 0.8798916554890817, |
|
"eval_runtime": 701.8292, |
|
"eval_samples_per_second": 4.152, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1e-05, |
|
"step": 30894 |
|
}, |
|
{ |
|
"epoch": 114.39, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1182, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.317433081674674, |
|
"eval_f1_macro": 0.7081036238174518, |
|
"eval_f1_micro": 0.8228996779994145, |
|
"eval_loss": 0.11918609589338303, |
|
"eval_roc_auc": 0.8823203985481548, |
|
"eval_runtime": 711.6073, |
|
"eval_samples_per_second": 4.095, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 1e-05, |
|
"step": 31165 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.32498284145504464, |
|
"eval_f1_macro": 0.7128134624321966, |
|
"eval_f1_micro": 0.8255601659751037, |
|
"eval_loss": 0.11899947375059128, |
|
"eval_roc_auc": 0.8861634026181779, |
|
"eval_runtime": 692.0205, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 31436 |
|
}, |
|
{ |
|
"epoch": 116.24, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1191, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.31708991077556625, |
|
"eval_f1_macro": 0.7103576429034303, |
|
"eval_f1_micro": 0.8231059020510674, |
|
"eval_loss": 0.11870068311691284, |
|
"eval_roc_auc": 0.8821442452365535, |
|
"eval_runtime": 704.8278, |
|
"eval_samples_per_second": 4.134, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 31707 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.31983527796842826, |
|
"eval_f1_macro": 0.7061058223171173, |
|
"eval_f1_micro": 0.8235982284616027, |
|
"eval_loss": 0.11885705590248108, |
|
"eval_roc_auc": 0.8829722487532972, |
|
"eval_runtime": 702.381, |
|
"eval_samples_per_second": 4.149, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 31978 |
|
}, |
|
{ |
|
"epoch": 118.08, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1179, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.3181194234728895, |
|
"eval_f1_macro": 0.7080350612891082, |
|
"eval_f1_micro": 0.8233181609387398, |
|
"eval_loss": 0.11888550966978073, |
|
"eval_roc_auc": 0.882981257702253, |
|
"eval_runtime": 695.5592, |
|
"eval_samples_per_second": 4.189, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 32249 |
|
}, |
|
{ |
|
"epoch": 119.93, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1176, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.31846259437199725, |
|
"eval_f1_macro": 0.710061474614843, |
|
"eval_f1_micro": 0.8238579526508836, |
|
"eval_loss": 0.11897823214530945, |
|
"eval_roc_auc": 0.883849117735537, |
|
"eval_runtime": 696.3539, |
|
"eval_samples_per_second": 4.185, |
|
"eval_steps_per_second": 0.132, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 32520 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.3208647906657515, |
|
"eval_f1_macro": 0.7127667221711917, |
|
"eval_f1_micro": 0.8254309453929147, |
|
"eval_loss": 0.11947356164455414, |
|
"eval_roc_auc": 0.8872447167930093, |
|
"eval_runtime": 693.3862, |
|
"eval_samples_per_second": 4.203, |
|
"eval_steps_per_second": 0.133, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 32791 |
|
}, |
|
{ |
|
"epoch": 121.77, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1175, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.31537405628002746, |
|
"eval_f1_macro": 0.7048262553703367, |
|
"eval_f1_micro": 0.8222669349429913, |
|
"eval_loss": 0.11920282989740372, |
|
"eval_roc_auc": 0.8812519652796962, |
|
"eval_runtime": 684.1671, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.134, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 33062 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.3212079615648593, |
|
"eval_f1_macro": 0.7154256602927447, |
|
"eval_f1_micro": 0.8255195344970907, |
|
"eval_loss": 0.1192421168088913, |
|
"eval_roc_auc": 0.8856336776492516, |
|
"eval_runtime": 707.8447, |
|
"eval_samples_per_second": 4.117, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 33333 |
|
}, |
|
{ |
|
"epoch": 123.62, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1176, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.3208647906657515, |
|
"eval_f1_macro": 0.7109465417900758, |
|
"eval_f1_micro": 0.8238532110091744, |
|
"eval_loss": 0.11888780444860458, |
|
"eval_roc_auc": 0.8836948286609143, |
|
"eval_runtime": 715.7035, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.129, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 33604 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.3225806451612903, |
|
"eval_f1_macro": 0.7101964285625825, |
|
"eval_f1_micro": 0.8251958006998834, |
|
"eval_loss": 0.11894452571868896, |
|
"eval_roc_auc": 0.8847439533789889, |
|
"eval_runtime": 706.7139, |
|
"eval_samples_per_second": 4.123, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 33875 |
|
}, |
|
{ |
|
"epoch": 125.46, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1179, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.3164035689773507, |
|
"eval_f1_macro": 0.702507166276965, |
|
"eval_f1_micro": 0.8206187305066173, |
|
"eval_loss": 0.11890433728694916, |
|
"eval_roc_auc": 0.8787088152808908, |
|
"eval_runtime": 705.4083, |
|
"eval_samples_per_second": 4.131, |
|
"eval_steps_per_second": 0.13, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 34146 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.32155113246396705, |
|
"eval_f1_macro": 0.7103986965520878, |
|
"eval_f1_micro": 0.8245357813477989, |
|
"eval_loss": 0.11901579052209854, |
|
"eval_roc_auc": 0.8838838116421884, |
|
"eval_runtime": 704.1405, |
|
"eval_samples_per_second": 4.138, |
|
"eval_steps_per_second": 0.131, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 34417 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 34417, |
|
"total_flos": 1.6323301632319567e+21, |
|
"train_loss": 0.0713793940258306, |
|
"train_runtime": 211000.3165, |
|
"train_samples_per_second": 6.162, |
|
"train_steps_per_second": 0.193 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40650, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"total_flos": 1.6323301632319567e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|