|
{ |
|
"best_metric": 0.1039666160941124, |
|
"best_model_checkpoint": "./dino-base-2023_11_27-with_custom_head/checkpoint-43952", |
|
"epoch": 90.0, |
|
"eval_steps": 500, |
|
"global_step": 48240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.01, |
|
"loss": 0.2471, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.39442658092175775, |
|
"eval_f1_macro": 0.40064106475011957, |
|
"eval_f1_micro": 0.5684482898035094, |
|
"eval_loss": 0.2136440873146057, |
|
"eval_roc_auc": 0.7020299868465498, |
|
"eval_runtime": 648.3221, |
|
"eval_samples_per_second": 4.317, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.01, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.01, |
|
"loss": 0.2208, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4090746695248303, |
|
"eval_f1_macro": 0.646218183693934, |
|
"eval_f1_micro": 0.6908665105386417, |
|
"eval_loss": 0.21994435787200928, |
|
"eval_roc_auc": 0.7944947312416977, |
|
"eval_runtime": 649.3168, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.01, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.01, |
|
"loss": 0.2181, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.43122543765630583, |
|
"eval_f1_macro": 0.5770940809584216, |
|
"eval_f1_micro": 0.6712328767123288, |
|
"eval_loss": 0.19793672859668732, |
|
"eval_roc_auc": 0.7666801441240528, |
|
"eval_runtime": 645.3042, |
|
"eval_samples_per_second": 4.337, |
|
"eval_steps_per_second": 0.271, |
|
"learning_rate": 0.01, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.01, |
|
"loss": 0.2187, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4137191854233655, |
|
"eval_f1_macro": 0.606113442290164, |
|
"eval_f1_micro": 0.7318466985527052, |
|
"eval_loss": 0.1766517609357834, |
|
"eval_roc_auc": 0.8268705246501159, |
|
"eval_runtime": 668.3118, |
|
"eval_samples_per_second": 4.188, |
|
"eval_steps_per_second": 0.262, |
|
"learning_rate": 0.01, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.01, |
|
"loss": 0.2128, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.43265451947123973, |
|
"eval_f1_macro": 0.5526692766303807, |
|
"eval_f1_micro": 0.7173007124613524, |
|
"eval_loss": 0.17600227892398834, |
|
"eval_roc_auc": 0.8011887560107374, |
|
"eval_runtime": 647.0188, |
|
"eval_samples_per_second": 4.326, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.01, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.01, |
|
"loss": 0.2171, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4226509467667024, |
|
"eval_f1_macro": 0.5183527283365527, |
|
"eval_f1_micro": 0.7108839135498899, |
|
"eval_loss": 0.1992170512676239, |
|
"eval_roc_auc": 0.8061565099276957, |
|
"eval_runtime": 650.6673, |
|
"eval_samples_per_second": 4.302, |
|
"eval_steps_per_second": 0.269, |
|
"learning_rate": 0.01, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.01, |
|
"loss": 0.2108, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.41729188996070027, |
|
"eval_f1_macro": 0.6054999995580052, |
|
"eval_f1_micro": 0.7338740337092892, |
|
"eval_loss": 0.16948619484901428, |
|
"eval_roc_auc": 0.8232094683978447, |
|
"eval_runtime": 649.3312, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.01, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.01, |
|
"loss": 0.2147, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4226509467667024, |
|
"eval_f1_macro": 0.6176021212641017, |
|
"eval_f1_micro": 0.7440822902466228, |
|
"eval_loss": 0.16185873746871948, |
|
"eval_roc_auc": 0.8341874074405569, |
|
"eval_runtime": 663.7112, |
|
"eval_samples_per_second": 4.217, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.01, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.01, |
|
"loss": 0.2112, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.43086816720257237, |
|
"eval_f1_macro": 0.6402287128969387, |
|
"eval_f1_micro": 0.7336987336987336, |
|
"eval_loss": 0.17079614102840424, |
|
"eval_roc_auc": 0.8244850070361233, |
|
"eval_runtime": 656.7931, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.01, |
|
"step": 4824 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.01, |
|
"loss": 0.216, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.42836727402643804, |
|
"eval_f1_macro": 0.643393584513474, |
|
"eval_f1_micro": 0.7493341591824095, |
|
"eval_loss": 0.1750514954328537, |
|
"eval_roc_auc": 0.83718619371997, |
|
"eval_runtime": 657.1366, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.01, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.01, |
|
"loss": 0.2151, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.43372633083244017, |
|
"eval_f1_macro": 0.6292914445814428, |
|
"eval_f1_micro": 0.7241767706883986, |
|
"eval_loss": 0.17014054954051971, |
|
"eval_roc_auc": 0.8078434001509203, |
|
"eval_runtime": 662.7207, |
|
"eval_samples_per_second": 4.223, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.01, |
|
"step": 5896 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.01, |
|
"loss": 0.213, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3565559128260093, |
|
"eval_f1_macro": 0.5371100777543761, |
|
"eval_f1_micro": 0.6629161350191394, |
|
"eval_loss": 0.2409001588821411, |
|
"eval_roc_auc": 0.8034829311226885, |
|
"eval_runtime": 657.3534, |
|
"eval_samples_per_second": 4.258, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.01, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 0.01, |
|
"loss": 0.2161, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4194355126831011, |
|
"eval_f1_macro": 0.6243034191518838, |
|
"eval_f1_micro": 0.7290690310322989, |
|
"eval_loss": 0.1704823523759842, |
|
"eval_roc_auc": 0.820743148916754, |
|
"eval_runtime": 665.336, |
|
"eval_samples_per_second": 4.207, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 0.01, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.01, |
|
"loss": 0.2145, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.01, |
|
"loss": 0.2136, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.44265809217577706, |
|
"eval_f1_macro": 0.6215367292632343, |
|
"eval_f1_micro": 0.734259379462547, |
|
"eval_loss": 0.16933664679527283, |
|
"eval_roc_auc": 0.8177495297062425, |
|
"eval_runtime": 654.4339, |
|
"eval_samples_per_second": 4.277, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.01, |
|
"step": 7504 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.001, |
|
"loss": 0.1826, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4969632011432655, |
|
"eval_f1_macro": 0.7459554779815883, |
|
"eval_f1_micro": 0.7967720076387605, |
|
"eval_loss": 0.13880470395088196, |
|
"eval_roc_auc": 0.8648126874646008, |
|
"eval_runtime": 668.3546, |
|
"eval_samples_per_second": 4.188, |
|
"eval_steps_per_second": 0.262, |
|
"learning_rate": 0.001, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.001, |
|
"loss": 0.1731, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5155412647374062, |
|
"eval_f1_macro": 0.7385190479208986, |
|
"eval_f1_micro": 0.8016270337922403, |
|
"eval_loss": 0.14762958884239197, |
|
"eval_roc_auc": 0.8631052728177114, |
|
"eval_runtime": 657.5159, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.1649, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5023222579492675, |
|
"eval_f1_macro": 0.7692855132157861, |
|
"eval_f1_micro": 0.813318908522659, |
|
"eval_loss": 0.1351146697998047, |
|
"eval_roc_auc": 0.8811667807242409, |
|
"eval_runtime": 655.4487, |
|
"eval_samples_per_second": 4.27, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.001, |
|
"step": 9112 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.1624, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5212575919971418, |
|
"eval_f1_macro": 0.7714136364578609, |
|
"eval_f1_micro": 0.8185593067340675, |
|
"eval_loss": 0.13848340511322021, |
|
"eval_roc_auc": 0.8837626029149266, |
|
"eval_runtime": 662.3967, |
|
"eval_samples_per_second": 4.226, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.001, |
|
"step": 9648 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.1576, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5205430510896749, |
|
"eval_f1_macro": 0.7631806100991755, |
|
"eval_f1_micro": 0.8175969609705288, |
|
"eval_loss": 0.13018544018268585, |
|
"eval_roc_auc": 0.8779148602729905, |
|
"eval_runtime": 679.9157, |
|
"eval_samples_per_second": 4.117, |
|
"eval_steps_per_second": 0.257, |
|
"learning_rate": 0.001, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.1544, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5319757056091461, |
|
"eval_f1_macro": 0.7721701876863485, |
|
"eval_f1_micro": 0.8229907814143413, |
|
"eval_loss": 0.12343110144138336, |
|
"eval_roc_auc": 0.8780568409783432, |
|
"eval_runtime": 658.9399, |
|
"eval_samples_per_second": 4.248, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 0.001, |
|
"loss": 0.1542, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.525902107895677, |
|
"eval_f1_macro": 0.7754587726434372, |
|
"eval_f1_micro": 0.8267645466032855, |
|
"eval_loss": 0.13044790923595428, |
|
"eval_roc_auc": 0.8884477411328827, |
|
"eval_runtime": 659.4337, |
|
"eval_samples_per_second": 4.245, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.001, |
|
"step": 11256 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.1525, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5376920328688818, |
|
"eval_f1_macro": 0.76461964189607, |
|
"eval_f1_micro": 0.8176823176823177, |
|
"eval_loss": 0.12196117639541626, |
|
"eval_roc_auc": 0.8724977198539325, |
|
"eval_runtime": 674.7285, |
|
"eval_samples_per_second": 4.148, |
|
"eval_steps_per_second": 0.259, |
|
"learning_rate": 0.001, |
|
"step": 11792 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 0.001, |
|
"loss": 0.1505, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5255448374419436, |
|
"eval_f1_macro": 0.7872363331907107, |
|
"eval_f1_micro": 0.8264049955396968, |
|
"eval_loss": 0.13109837472438812, |
|
"eval_roc_auc": 0.891814004155335, |
|
"eval_runtime": 658.8901, |
|
"eval_samples_per_second": 4.248, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 0.001, |
|
"loss": 0.1515, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5316184351554126, |
|
"eval_f1_macro": 0.7629977302664256, |
|
"eval_f1_micro": 0.8188446438586652, |
|
"eval_loss": 0.12468679994344711, |
|
"eval_roc_auc": 0.8736890703040258, |
|
"eval_runtime": 649.926, |
|
"eval_samples_per_second": 4.307, |
|
"eval_steps_per_second": 0.269, |
|
"learning_rate": 0.001, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.1471, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5226866738120758, |
|
"eval_f1_macro": 0.7726394339166129, |
|
"eval_f1_micro": 0.8258118937157316, |
|
"eval_loss": 0.1264602392911911, |
|
"eval_roc_auc": 0.8875076814616129, |
|
"eval_runtime": 661.9223, |
|
"eval_samples_per_second": 4.229, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.001, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 0.001, |
|
"loss": 0.1475, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5330475169703466, |
|
"eval_f1_macro": 0.7834057788273457, |
|
"eval_f1_micro": 0.8301059554256485, |
|
"eval_loss": 0.12767212092876434, |
|
"eval_roc_auc": 0.8867279830614062, |
|
"eval_runtime": 654.8587, |
|
"eval_samples_per_second": 4.274, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.001, |
|
"step": 13936 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.1484, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5398356555912825, |
|
"eval_f1_macro": 0.7661233818608114, |
|
"eval_f1_micro": 0.8218290555693994, |
|
"eval_loss": 0.12360195070505142, |
|
"eval_roc_auc": 0.8754121833739898, |
|
"eval_runtime": 658.0747, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 14472 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.1475, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.1472, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5401929260450161, |
|
"eval_f1_macro": 0.7728524137767288, |
|
"eval_f1_micro": 0.8228913409388442, |
|
"eval_loss": 0.1256585270166397, |
|
"eval_roc_auc": 0.8757945197904818, |
|
"eval_runtime": 655.1438, |
|
"eval_samples_per_second": 4.272, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.001, |
|
"step": 15008 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1379, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5451947123972848, |
|
"eval_f1_macro": 0.7891009081675242, |
|
"eval_f1_micro": 0.8352094482376824, |
|
"eval_loss": 0.11994459480047226, |
|
"eval_roc_auc": 0.8865305601939816, |
|
"eval_runtime": 657.6068, |
|
"eval_samples_per_second": 4.256, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.0001, |
|
"step": 15544 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1349, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5487674169346195, |
|
"eval_f1_macro": 0.7943922901606704, |
|
"eval_f1_micro": 0.841315916787615, |
|
"eval_loss": 0.11564121395349503, |
|
"eval_roc_auc": 0.895138845939546, |
|
"eval_runtime": 659.4245, |
|
"eval_samples_per_second": 4.245, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1326, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5498392282958199, |
|
"eval_f1_macro": 0.7983364672353336, |
|
"eval_f1_micro": 0.84037558685446, |
|
"eval_loss": 0.11515345424413681, |
|
"eval_roc_auc": 0.8960665621862288, |
|
"eval_runtime": 652.6109, |
|
"eval_samples_per_second": 4.289, |
|
"eval_steps_per_second": 0.268, |
|
"learning_rate": 0.0001, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 31.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1321, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5509110396570204, |
|
"eval_f1_macro": 0.7911364775051637, |
|
"eval_f1_micro": 0.8385913426265589, |
|
"eval_loss": 0.11371538788080215, |
|
"eval_roc_auc": 0.89024940896342, |
|
"eval_runtime": 663.8067, |
|
"eval_samples_per_second": 4.217, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 17152 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1294, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5512683101107538, |
|
"eval_f1_macro": 0.7924466456431516, |
|
"eval_f1_micro": 0.8406133545115768, |
|
"eval_loss": 0.11363548040390015, |
|
"eval_roc_auc": 0.891620070857568, |
|
"eval_runtime": 641.5792, |
|
"eval_samples_per_second": 4.363, |
|
"eval_steps_per_second": 0.273, |
|
"learning_rate": 0.0001, |
|
"step": 17688 |
|
}, |
|
{ |
|
"epoch": 33.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1297, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5551982851018221, |
|
"eval_f1_macro": 0.7994952163993189, |
|
"eval_f1_micro": 0.8438879816125325, |
|
"eval_loss": 0.1100151389837265, |
|
"eval_roc_auc": 0.8965325171836247, |
|
"eval_runtime": 666.5011, |
|
"eval_samples_per_second": 4.2, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 0.0001, |
|
"step": 18224 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1296, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5569846373704894, |
|
"eval_f1_macro": 0.7958547104209327, |
|
"eval_f1_micro": 0.8430563978168587, |
|
"eval_loss": 0.11022897809743881, |
|
"eval_roc_auc": 0.8952561397957243, |
|
"eval_runtime": 651.2057, |
|
"eval_samples_per_second": 4.298, |
|
"eval_steps_per_second": 0.269, |
|
"learning_rate": 0.0001, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1276, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5584137191854234, |
|
"eval_f1_macro": 0.7954026209465306, |
|
"eval_f1_micro": 0.8428501708150318, |
|
"eval_loss": 0.11038191616535187, |
|
"eval_roc_auc": 0.8933002510838177, |
|
"eval_runtime": 659.4359, |
|
"eval_samples_per_second": 4.245, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 19296 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1264, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5644873168988924, |
|
"eval_f1_macro": 0.8073166114826349, |
|
"eval_f1_micro": 0.8467775842392937, |
|
"eval_loss": 0.11108729988336563, |
|
"eval_roc_auc": 0.9003547734565809, |
|
"eval_runtime": 649.1518, |
|
"eval_samples_per_second": 4.312, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.0001, |
|
"step": 19832 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1279, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5662736691675598, |
|
"eval_f1_macro": 0.8059718169069215, |
|
"eval_f1_micro": 0.8457475869604808, |
|
"eval_loss": 0.11050034314393997, |
|
"eval_roc_auc": 0.8964483289954395, |
|
"eval_runtime": 661.6908, |
|
"eval_samples_per_second": 4.23, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 20368 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1231, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5623436941764915, |
|
"eval_f1_macro": 0.810452280144873, |
|
"eval_f1_micro": 0.8481141692150868, |
|
"eval_loss": 0.1114969253540039, |
|
"eval_roc_auc": 0.9016441984475202, |
|
"eval_runtime": 645.0365, |
|
"eval_samples_per_second": 4.339, |
|
"eval_steps_per_second": 0.271, |
|
"learning_rate": 0.0001, |
|
"step": 20904 |
|
}, |
|
{ |
|
"epoch": 39.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1276, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.564844587352626, |
|
"eval_f1_macro": 0.7998757908744976, |
|
"eval_f1_micro": 0.8442948914040991, |
|
"eval_loss": 0.10886894911527634, |
|
"eval_roc_auc": 0.8932229387863774, |
|
"eval_runtime": 664.3024, |
|
"eval_samples_per_second": 4.213, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 0.0001, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 40.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.121, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5627009646302251, |
|
"eval_f1_macro": 0.8014681171854418, |
|
"eval_f1_micro": 0.8454512239678481, |
|
"eval_loss": 0.10981705039739609, |
|
"eval_roc_auc": 0.8953198348828445, |
|
"eval_runtime": 652.9462, |
|
"eval_samples_per_second": 4.287, |
|
"eval_steps_per_second": 0.268, |
|
"learning_rate": 0.0001, |
|
"step": 21976 |
|
}, |
|
{ |
|
"epoch": 41.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1241, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1229, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5619864237227581, |
|
"eval_f1_macro": 0.8009464266100834, |
|
"eval_f1_micro": 0.8459390554813646, |
|
"eval_loss": 0.10872387140989304, |
|
"eval_roc_auc": 0.8965747458488879, |
|
"eval_runtime": 655.0026, |
|
"eval_samples_per_second": 4.273, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.0001, |
|
"step": 22512 |
|
}, |
|
{ |
|
"epoch": 42.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1227, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5687745623436942, |
|
"eval_f1_macro": 0.8067169938735436, |
|
"eval_f1_micro": 0.8468292682926829, |
|
"eval_loss": 0.10882638394832611, |
|
"eval_roc_auc": 0.8956753719148074, |
|
"eval_runtime": 660.8489, |
|
"eval_samples_per_second": 4.235, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 23048 |
|
}, |
|
{ |
|
"epoch": 43.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1221, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5673454805287603, |
|
"eval_f1_macro": 0.8065744332804647, |
|
"eval_f1_micro": 0.8476346632659257, |
|
"eval_loss": 0.10762665420770645, |
|
"eval_roc_auc": 0.897394560379003, |
|
"eval_runtime": 657.9808, |
|
"eval_samples_per_second": 4.254, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.0001, |
|
"step": 23584 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1191, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5698463737048947, |
|
"eval_f1_macro": 0.817339787108748, |
|
"eval_f1_micro": 0.8507955568898229, |
|
"eval_loss": 0.10689569264650345, |
|
"eval_roc_auc": 0.9027498177813545, |
|
"eval_runtime": 671.4032, |
|
"eval_samples_per_second": 4.169, |
|
"eval_steps_per_second": 0.261, |
|
"learning_rate": 0.0001, |
|
"step": 24120 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1212, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.563415505537692, |
|
"eval_f1_macro": 0.8173548628519673, |
|
"eval_f1_micro": 0.8508932256352808, |
|
"eval_loss": 0.10721632838249207, |
|
"eval_roc_auc": 0.9086343215547117, |
|
"eval_runtime": 661.9534, |
|
"eval_samples_per_second": 4.228, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 24656 |
|
}, |
|
{ |
|
"epoch": 46.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1198, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5687745623436942, |
|
"eval_f1_macro": 0.8090465345914333, |
|
"eval_f1_micro": 0.849293563579278, |
|
"eval_loss": 0.10663535445928574, |
|
"eval_roc_auc": 0.900130416633457, |
|
"eval_runtime": 673.8512, |
|
"eval_samples_per_second": 4.154, |
|
"eval_steps_per_second": 0.26, |
|
"learning_rate": 0.0001, |
|
"step": 25192 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1201, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5652018578063595, |
|
"eval_f1_macro": 0.8082582901487069, |
|
"eval_f1_micro": 0.8484811957569913, |
|
"eval_loss": 0.10762892663478851, |
|
"eval_roc_auc": 0.9002407000277199, |
|
"eval_runtime": 662.4725, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 25728 |
|
}, |
|
{ |
|
"epoch": 48.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1189, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5687745623436942, |
|
"eval_f1_macro": 0.8152048365492516, |
|
"eval_f1_micro": 0.8507597141312836, |
|
"eval_loss": 0.10654111951589584, |
|
"eval_roc_auc": 0.9026532861685821, |
|
"eval_runtime": 679.062, |
|
"eval_samples_per_second": 4.122, |
|
"eval_steps_per_second": 0.258, |
|
"learning_rate": 0.0001, |
|
"step": 26264 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1176, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.563415505537692, |
|
"eval_f1_macro": 0.8033757987633436, |
|
"eval_f1_micro": 0.8462940461725396, |
|
"eval_loss": 0.10730718821287155, |
|
"eval_roc_auc": 0.8965096424048645, |
|
"eval_runtime": 643.0995, |
|
"eval_samples_per_second": 4.352, |
|
"eval_steps_per_second": 0.272, |
|
"learning_rate": 0.0001, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 50.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1202, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5727045373347625, |
|
"eval_f1_macro": 0.8102272806912906, |
|
"eval_f1_micro": 0.8480676328502416, |
|
"eval_loss": 0.10732194036245346, |
|
"eval_roc_auc": 0.8994002578253693, |
|
"eval_runtime": 645.0782, |
|
"eval_samples_per_second": 4.339, |
|
"eval_steps_per_second": 0.271, |
|
"learning_rate": 0.0001, |
|
"step": 27336 |
|
}, |
|
{ |
|
"epoch": 51.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1167, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.571632725973562, |
|
"eval_f1_macro": 0.8179079350328715, |
|
"eval_f1_micro": 0.8521687462863933, |
|
"eval_loss": 0.10600127279758453, |
|
"eval_roc_auc": 0.9068707918898985, |
|
"eval_runtime": 661.3351, |
|
"eval_samples_per_second": 4.232, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 27872 |
|
}, |
|
{ |
|
"epoch": 52.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1192, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5712754555198285, |
|
"eval_f1_macro": 0.8127939311574505, |
|
"eval_f1_micro": 0.8507093268940539, |
|
"eval_loss": 0.10628383606672287, |
|
"eval_roc_auc": 0.9009869922949931, |
|
"eval_runtime": 649.2151, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.0001, |
|
"step": 28408 |
|
}, |
|
{ |
|
"epoch": 53.17, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1156, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5719899964272954, |
|
"eval_f1_macro": 0.8113280265904282, |
|
"eval_f1_micro": 0.8493415488703637, |
|
"eval_loss": 0.10670817643404007, |
|
"eval_roc_auc": 0.9000055427080821, |
|
"eval_runtime": 659.1604, |
|
"eval_samples_per_second": 4.246, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 28944 |
|
}, |
|
{ |
|
"epoch": 54.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1193, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.5727045373347625, |
|
"eval_f1_macro": 0.8116343899877551, |
|
"eval_f1_micro": 0.8490052609300356, |
|
"eval_loss": 0.10690104961395264, |
|
"eval_roc_auc": 0.8994929994035434, |
|
"eval_runtime": 663.4912, |
|
"eval_samples_per_second": 4.219, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 29480 |
|
}, |
|
{ |
|
"epoch": 55.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1161, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 55.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.116, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5744908896034298, |
|
"eval_f1_macro": 0.818624257330544, |
|
"eval_f1_micro": 0.8542707589816796, |
|
"eval_loss": 0.10558204352855682, |
|
"eval_roc_auc": 0.9077319963356549, |
|
"eval_runtime": 659.0048, |
|
"eval_samples_per_second": 4.247, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.0001, |
|
"step": 30016 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1147, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5730618077884959, |
|
"eval_f1_macro": 0.8113712664273885, |
|
"eval_f1_micro": 0.8505116959064327, |
|
"eval_loss": 0.10627623647451401, |
|
"eval_roc_auc": 0.8980389880708319, |
|
"eval_runtime": 669.8276, |
|
"eval_samples_per_second": 4.179, |
|
"eval_steps_per_second": 0.261, |
|
"learning_rate": 0.0001, |
|
"step": 30552 |
|
}, |
|
{ |
|
"epoch": 57.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1139, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5705609146123616, |
|
"eval_f1_macro": 0.8074486566040442, |
|
"eval_f1_micro": 0.8488632919066383, |
|
"eval_loss": 0.10657747834920883, |
|
"eval_roc_auc": 0.898615796069772, |
|
"eval_runtime": 654.902, |
|
"eval_samples_per_second": 4.274, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.0001, |
|
"step": 31088 |
|
}, |
|
{ |
|
"epoch": 58.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1143, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5727045373347625, |
|
"eval_f1_macro": 0.8065194300593265, |
|
"eval_f1_micro": 0.8490646517579673, |
|
"eval_loss": 0.10738535225391388, |
|
"eval_roc_auc": 0.8971341466029096, |
|
"eval_runtime": 661.8186, |
|
"eval_samples_per_second": 4.229, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 31624 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1148, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5694891032511611, |
|
"eval_f1_macro": 0.8079607267977419, |
|
"eval_f1_micro": 0.8498935199269851, |
|
"eval_loss": 0.10777446627616882, |
|
"eval_roc_auc": 0.8980542827803957, |
|
"eval_runtime": 663.1043, |
|
"eval_samples_per_second": 4.221, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 32160 |
|
}, |
|
{ |
|
"epoch": 60.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1143, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.572347266881029, |
|
"eval_f1_macro": 0.8159017507862669, |
|
"eval_f1_micro": 0.8512052195976559, |
|
"eval_loss": 0.10536229610443115, |
|
"eval_roc_auc": 0.9010383439575261, |
|
"eval_runtime": 660.7796, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 32696 |
|
}, |
|
{ |
|
"epoch": 61.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1133, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.5737763486959628, |
|
"eval_f1_macro": 0.808341771577789, |
|
"eval_f1_micro": 0.8495672315006705, |
|
"eval_loss": 0.10581369698047638, |
|
"eval_roc_auc": 0.8973203339377094, |
|
"eval_runtime": 655.4703, |
|
"eval_samples_per_second": 4.27, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.0001, |
|
"step": 33232 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1134, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.5680600214362272, |
|
"eval_f1_macro": 0.8087952156757714, |
|
"eval_f1_micro": 0.847873368777187, |
|
"eval_loss": 0.1063385158777237, |
|
"eval_roc_auc": 0.899139005299654, |
|
"eval_runtime": 659.92, |
|
"eval_samples_per_second": 4.241, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.0001, |
|
"step": 33768 |
|
}, |
|
{ |
|
"epoch": 63.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1123, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.5702036441586281, |
|
"eval_f1_macro": 0.8121242156828758, |
|
"eval_f1_micro": 0.8503755754785559, |
|
"eval_loss": 0.10543316602706909, |
|
"eval_roc_auc": 0.8997137007632394, |
|
"eval_runtime": 647.0637, |
|
"eval_samples_per_second": 4.326, |
|
"eval_steps_per_second": 0.27, |
|
"learning_rate": 0.0001, |
|
"step": 34304 |
|
}, |
|
{ |
|
"epoch": 64.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1141, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.5730618077884959, |
|
"eval_f1_macro": 0.8099395952334975, |
|
"eval_f1_micro": 0.8494330240737371, |
|
"eval_loss": 0.10500979423522949, |
|
"eval_roc_auc": 0.8988602109578501, |
|
"eval_runtime": 649.7608, |
|
"eval_samples_per_second": 4.308, |
|
"eval_steps_per_second": 0.269, |
|
"learning_rate": 0.0001, |
|
"step": 34840 |
|
}, |
|
{ |
|
"epoch": 65.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1104, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.5762772418720972, |
|
"eval_f1_macro": 0.8132610218333434, |
|
"eval_f1_micro": 0.8506980430409072, |
|
"eval_loss": 0.10500740259885788, |
|
"eval_roc_auc": 0.8978957335797464, |
|
"eval_runtime": 652.9454, |
|
"eval_samples_per_second": 4.287, |
|
"eval_steps_per_second": 0.268, |
|
"learning_rate": 0.0001, |
|
"step": 35376 |
|
}, |
|
{ |
|
"epoch": 66.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1124, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.5669882100750268, |
|
"eval_f1_macro": 0.8163481281680508, |
|
"eval_f1_micro": 0.8512857399748246, |
|
"eval_loss": 0.10600199550390244, |
|
"eval_roc_auc": 0.9035619176711028, |
|
"eval_runtime": 657.7451, |
|
"eval_samples_per_second": 4.255, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.0001, |
|
"step": 35912 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1111, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.5680600214362272, |
|
"eval_f1_macro": 0.8156635542444352, |
|
"eval_f1_micro": 0.8511868899867453, |
|
"eval_loss": 0.10536548495292664, |
|
"eval_roc_auc": 0.9018856620506218, |
|
"eval_runtime": 656.6156, |
|
"eval_samples_per_second": 4.263, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.0001, |
|
"step": 36448 |
|
}, |
|
{ |
|
"epoch": 68.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1097, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.5673454805287603, |
|
"eval_f1_macro": 0.8109923908741982, |
|
"eval_f1_micro": 0.8500871237156763, |
|
"eval_loss": 0.1056470051407814, |
|
"eval_roc_auc": 0.9021192764421861, |
|
"eval_runtime": 657.5814, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.0001, |
|
"step": 36984 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1106, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 69.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1096, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.5673454805287603, |
|
"eval_f1_macro": 0.8119494369118779, |
|
"eval_f1_micro": 0.8500514558992676, |
|
"eval_loss": 0.1059202253818512, |
|
"eval_roc_auc": 0.8997405001477684, |
|
"eval_runtime": 661.9686, |
|
"eval_samples_per_second": 4.228, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.0001, |
|
"step": 37520 |
|
}, |
|
{ |
|
"epoch": 70.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1097, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.5737763486959628, |
|
"eval_f1_macro": 0.8171777200998823, |
|
"eval_f1_micro": 0.8517297200071947, |
|
"eval_loss": 0.10546696186065674, |
|
"eval_roc_auc": 0.9037381432590107, |
|
"eval_runtime": 654.6273, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.0001, |
|
"step": 38056 |
|
}, |
|
{ |
|
"epoch": 71.83, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1084, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.5630582350839586, |
|
"eval_f1_macro": 0.8063147670569071, |
|
"eval_f1_micro": 0.84692696594993, |
|
"eval_loss": 0.10735420882701874, |
|
"eval_roc_auc": 0.8960515366190057, |
|
"eval_runtime": 658.0374, |
|
"eval_samples_per_second": 4.254, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 1e-05, |
|
"step": 38592 |
|
}, |
|
{ |
|
"epoch": 72.76, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1091, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.5734190782422294, |
|
"eval_f1_macro": 0.8171103344103756, |
|
"eval_f1_micro": 0.8525143029208069, |
|
"eval_loss": 0.10438621789216995, |
|
"eval_roc_auc": 0.9027705800247602, |
|
"eval_runtime": 664.8235, |
|
"eval_samples_per_second": 4.21, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 1e-05, |
|
"step": 39128 |
|
}, |
|
{ |
|
"epoch": 73.69, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1051, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.571632725973562, |
|
"eval_f1_macro": 0.8187356068324974, |
|
"eval_f1_micro": 0.8532934131736527, |
|
"eval_loss": 0.10408420860767365, |
|
"eval_roc_auc": 0.9050291111780234, |
|
"eval_runtime": 673.5967, |
|
"eval_samples_per_second": 4.155, |
|
"eval_steps_per_second": 0.26, |
|
"learning_rate": 1e-05, |
|
"step": 39664 |
|
}, |
|
{ |
|
"epoch": 74.63, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1069, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.5698463737048947, |
|
"eval_f1_macro": 0.8154818599661411, |
|
"eval_f1_micro": 0.8505788712011578, |
|
"eval_loss": 0.10555566847324371, |
|
"eval_roc_auc": 0.9012750442052373, |
|
"eval_runtime": 657.1771, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 1e-05, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1079, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.572347266881029, |
|
"eval_f1_macro": 0.8153806167901401, |
|
"eval_f1_micro": 0.8517114840883114, |
|
"eval_loss": 0.10429207235574722, |
|
"eval_roc_auc": 0.9026594578808756, |
|
"eval_runtime": 664.6613, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 1e-05, |
|
"step": 40736 |
|
}, |
|
{ |
|
"epoch": 76.49, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1072, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.5780635941407646, |
|
"eval_f1_macro": 0.8187874152985335, |
|
"eval_f1_micro": 0.853607002716571, |
|
"eval_loss": 0.10398340970277786, |
|
"eval_roc_auc": 0.9026235354990767, |
|
"eval_runtime": 664.3977, |
|
"eval_samples_per_second": 4.213, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 1e-05, |
|
"step": 41272 |
|
}, |
|
{ |
|
"epoch": 77.43, |
|
"learning_rate": 1e-05, |
|
"loss": 0.105, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.5712754555198285, |
|
"eval_f1_macro": 0.8150330415154026, |
|
"eval_f1_micro": 0.8514409743156879, |
|
"eval_loss": 0.10431113094091415, |
|
"eval_roc_auc": 0.9018006351128146, |
|
"eval_runtime": 660.4258, |
|
"eval_samples_per_second": 4.238, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 1e-05, |
|
"step": 41808 |
|
}, |
|
{ |
|
"epoch": 78.36, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1061, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.5734190782422294, |
|
"eval_f1_macro": 0.8180685346674, |
|
"eval_f1_micro": 0.8525934861278649, |
|
"eval_loss": 0.10427288711071014, |
|
"eval_roc_auc": 0.9023377306953573, |
|
"eval_runtime": 662.4252, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 1e-05, |
|
"step": 42344 |
|
}, |
|
{ |
|
"epoch": 79.29, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1045, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.5719899964272954, |
|
"eval_f1_macro": 0.8176554657248621, |
|
"eval_f1_micro": 0.8512023142288917, |
|
"eval_loss": 0.10507169365882874, |
|
"eval_roc_auc": 0.9017990921847413, |
|
"eval_runtime": 662.5216, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 1e-05, |
|
"step": 42880 |
|
}, |
|
{ |
|
"epoch": 80.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1062, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.5694891032511611, |
|
"eval_f1_macro": 0.8157488543073257, |
|
"eval_f1_micro": 0.8500577402297453, |
|
"eval_loss": 0.10686225444078445, |
|
"eval_roc_auc": 0.8984886750378719, |
|
"eval_runtime": 655.8239, |
|
"eval_samples_per_second": 4.268, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 1e-05, |
|
"step": 43416 |
|
}, |
|
{ |
|
"epoch": 81.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1057, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.5734190782422294, |
|
"eval_f1_macro": 0.8184447896335341, |
|
"eval_f1_micro": 0.8530242056579975, |
|
"eval_loss": 0.1039666160941124, |
|
"eval_roc_auc": 0.9039006170654286, |
|
"eval_runtime": 659.9314, |
|
"eval_samples_per_second": 4.241, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 1e-05, |
|
"step": 43952 |
|
}, |
|
{ |
|
"epoch": 82.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1073, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.5712754555198285, |
|
"eval_f1_macro": 0.8151349204991644, |
|
"eval_f1_micro": 0.8503600634688149, |
|
"eval_loss": 0.10477207601070404, |
|
"eval_roc_auc": 0.8973831902752079, |
|
"eval_runtime": 660.9853, |
|
"eval_samples_per_second": 4.235, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 1e-05, |
|
"step": 44488 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1053, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1059, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.5741336191496963, |
|
"eval_f1_macro": 0.8177615611039802, |
|
"eval_f1_micro": 0.8525637163342773, |
|
"eval_loss": 0.1043851226568222, |
|
"eval_roc_auc": 0.9026457060993854, |
|
"eval_runtime": 661.1385, |
|
"eval_samples_per_second": 4.234, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 1e-05, |
|
"step": 45024 |
|
}, |
|
{ |
|
"epoch": 84.89, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1054, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.5702036441586281, |
|
"eval_f1_macro": 0.8148620661290662, |
|
"eval_f1_micro": 0.8505413415593056, |
|
"eval_loss": 0.10626183450222015, |
|
"eval_roc_auc": 0.9002829286929828, |
|
"eval_runtime": 667.6418, |
|
"eval_samples_per_second": 4.192, |
|
"eval_steps_per_second": 0.262, |
|
"learning_rate": 1e-05, |
|
"step": 45560 |
|
}, |
|
{ |
|
"epoch": 85.82, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1046, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.5727045373347625, |
|
"eval_f1_macro": 0.8160800717420196, |
|
"eval_f1_micro": 0.8525306469231703, |
|
"eval_loss": 0.10432148724794388, |
|
"eval_roc_auc": 0.9003534996708482, |
|
"eval_runtime": 656.1196, |
|
"eval_samples_per_second": 4.266, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 1e-05, |
|
"step": 46096 |
|
}, |
|
{ |
|
"epoch": 86.75, |
|
"learning_rate": 1e-05, |
|
"loss": 0.105, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.5719899964272954, |
|
"eval_f1_macro": 0.8185429098530308, |
|
"eval_f1_micro": 0.8532148646214804, |
|
"eval_loss": 0.10474765300750732, |
|
"eval_roc_auc": 0.904161869591144, |
|
"eval_runtime": 662.4652, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 1e-05, |
|
"step": 46632 |
|
}, |
|
{ |
|
"epoch": 87.69, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1029, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.5759199714183637, |
|
"eval_f1_macro": 0.8155503181233792, |
|
"eval_f1_micro": 0.8518207954000958, |
|
"eval_loss": 0.10432733595371246, |
|
"eval_roc_auc": 0.9041143079632091, |
|
"eval_runtime": 659.661, |
|
"eval_samples_per_second": 4.243, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 1e-05, |
|
"step": 47168 |
|
}, |
|
{ |
|
"epoch": 88.62, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1059, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.5787781350482315, |
|
"eval_f1_macro": 0.8178092875894164, |
|
"eval_f1_micro": 0.8538892570946557, |
|
"eval_loss": 0.10400809347629547, |
|
"eval_roc_auc": 0.9033957884012571, |
|
"eval_runtime": 659.0133, |
|
"eval_samples_per_second": 4.247, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 47704 |
|
}, |
|
{ |
|
"epoch": 89.55, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1047, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.5694891032511611, |
|
"eval_f1_macro": 0.8136056007290207, |
|
"eval_f1_micro": 0.8504655833485485, |
|
"eval_loss": 0.10469033569097519, |
|
"eval_roc_auc": 0.8982986976684738, |
|
"eval_runtime": 653.5683, |
|
"eval_samples_per_second": 4.283, |
|
"eval_steps_per_second": 0.268, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 48240 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 48240, |
|
"total_flos": 6.13340503159962e+19, |
|
"train_loss": 0.13765035268679188, |
|
"train_runtime": 238196.7751, |
|
"train_samples_per_second": 3.237, |
|
"train_steps_per_second": 0.203 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 48240, |
|
"num_train_epochs": 90, |
|
"save_steps": 500, |
|
"total_flos": 6.13340503159962e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|