|
{ |
|
"best_metric": 0.10334235429763794, |
|
"best_model_checkpoint": "/home1/datahome/mcontini/multilabelTest/huggingface_multilabel/models/dino-large-2023_12_08-with_custom_head-imgsize1036/checkpoint-26264", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 26800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.01, |
|
"loss": 0.2502, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4030010718113612, |
|
"eval_f1_macro": 0.5121125903883165, |
|
"eval_f1_micro": 0.6426868905742146, |
|
"eval_loss": 0.19902026653289795, |
|
"eval_roc_auc": 0.7509953312991865, |
|
"eval_runtime": 684.3444, |
|
"eval_samples_per_second": 4.09, |
|
"eval_steps_per_second": 0.256, |
|
"learning_rate": 0.01, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.01, |
|
"loss": 0.2164, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4047874240800286, |
|
"eval_f1_macro": 0.6295963337884725, |
|
"eval_f1_micro": 0.6769131832797427, |
|
"eval_loss": 1.1450586318969727, |
|
"eval_roc_auc": 0.7895166431869363, |
|
"eval_runtime": 697.1722, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.251, |
|
"learning_rate": 0.01, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.01, |
|
"loss": 0.2149, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3994283672740264, |
|
"eval_f1_macro": 0.5251308791060187, |
|
"eval_f1_micro": 0.651589690873045, |
|
"eval_loss": 0.2338775098323822, |
|
"eval_roc_auc": 0.7524284144947219, |
|
"eval_runtime": 691.846, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.253, |
|
"learning_rate": 0.01, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.01, |
|
"loss": 0.2159, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4115755627009646, |
|
"eval_f1_macro": 0.6028196862535703, |
|
"eval_f1_micro": 0.7376345424002986, |
|
"eval_loss": 0.16987967491149902, |
|
"eval_roc_auc": 0.8296525341756016, |
|
"eval_runtime": 692.4855, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.253, |
|
"learning_rate": 0.01, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.01, |
|
"loss": 0.2171, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.43944265809217575, |
|
"eval_f1_macro": 0.63036934398975, |
|
"eval_f1_micro": 0.7363523166655744, |
|
"eval_loss": 0.16497960686683655, |
|
"eval_roc_auc": 0.8166227130928913, |
|
"eval_runtime": 679.5828, |
|
"eval_samples_per_second": 4.119, |
|
"eval_steps_per_second": 0.258, |
|
"learning_rate": 0.01, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.01, |
|
"loss": 0.2166, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.39228295819935693, |
|
"eval_f1_macro": 0.5122569229120599, |
|
"eval_f1_micro": 0.6852550663871418, |
|
"eval_loss": 0.1748417615890503, |
|
"eval_roc_auc": 0.7773210612716831, |
|
"eval_runtime": 677.7081, |
|
"eval_samples_per_second": 4.13, |
|
"eval_steps_per_second": 0.258, |
|
"learning_rate": 0.01, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.01, |
|
"loss": 0.2081, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4347981421936406, |
|
"eval_f1_macro": 0.6129188202062517, |
|
"eval_f1_micro": 0.7454787740338855, |
|
"eval_loss": 0.1636398434638977, |
|
"eval_roc_auc": 0.8290983004904813, |
|
"eval_runtime": 695.4612, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.252, |
|
"learning_rate": 0.01, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.01, |
|
"loss": 0.2111, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.42765273311897106, |
|
"eval_f1_macro": 0.6444819502221721, |
|
"eval_f1_micro": 0.7542994588093808, |
|
"eval_loss": 0.16511820256710052, |
|
"eval_roc_auc": 0.847560960415324, |
|
"eval_runtime": 698.6301, |
|
"eval_samples_per_second": 4.006, |
|
"eval_steps_per_second": 0.25, |
|
"learning_rate": 0.01, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.01, |
|
"loss": 0.209, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.43586995355484104, |
|
"eval_f1_macro": 0.6522075091299645, |
|
"eval_f1_micro": 0.7062128001070019, |
|
"eval_loss": 0.1750405728816986, |
|
"eval_roc_auc": 0.7966230241843142, |
|
"eval_runtime": 689.5145, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.254, |
|
"learning_rate": 0.01, |
|
"step": 4824 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.01, |
|
"loss": 0.2107, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.3829939264022865, |
|
"eval_f1_macro": 0.5923770104096581, |
|
"eval_f1_micro": 0.7244064088539991, |
|
"eval_loss": 0.17505380511283875, |
|
"eval_roc_auc": 0.8145635827712373, |
|
"eval_runtime": 706.021, |
|
"eval_samples_per_second": 3.964, |
|
"eval_steps_per_second": 0.248, |
|
"learning_rate": 0.01, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.01, |
|
"loss": 0.2162, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.42515183994283673, |
|
"eval_f1_macro": 0.678019114323704, |
|
"eval_f1_micro": 0.7505816381315993, |
|
"eval_loss": 0.22285762429237366, |
|
"eval_roc_auc": 0.8475316447819295, |
|
"eval_runtime": 709.1839, |
|
"eval_samples_per_second": 3.947, |
|
"eval_steps_per_second": 0.247, |
|
"learning_rate": 0.01, |
|
"step": 5896 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.01, |
|
"loss": 0.2153, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4105037513397642, |
|
"eval_f1_macro": 0.6543045937545326, |
|
"eval_f1_micro": 0.7501448603546181, |
|
"eval_loss": 0.1740036904811859, |
|
"eval_roc_auc": 0.8550110327013883, |
|
"eval_runtime": 710.4504, |
|
"eval_samples_per_second": 3.94, |
|
"eval_steps_per_second": 0.246, |
|
"learning_rate": 0.01, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 0.01, |
|
"loss": 0.2197, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.41872097177563417, |
|
"eval_f1_macro": 0.6605308887240204, |
|
"eval_f1_micro": 0.748710897215538, |
|
"eval_loss": 0.1744571030139923, |
|
"eval_roc_auc": 0.857218077620948, |
|
"eval_runtime": 687.247, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.255, |
|
"learning_rate": 0.01, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.001, |
|
"loss": 0.2128, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.18, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5058949624866024, |
|
"eval_f1_macro": 0.7455097936778116, |
|
"eval_f1_micro": 0.8036385688295937, |
|
"eval_loss": 0.13480359315872192, |
|
"eval_roc_auc": 0.8730946172404269, |
|
"eval_runtime": 700.8684, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.25, |
|
"learning_rate": 0.001, |
|
"step": 7504 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.001, |
|
"loss": 0.164, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5173276170060735, |
|
"eval_f1_macro": 0.7782933504010985, |
|
"eval_f1_micro": 0.8159655831739961, |
|
"eval_loss": 0.13084079325199127, |
|
"eval_roc_auc": 0.8844182661393802, |
|
"eval_runtime": 699.1842, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.25, |
|
"learning_rate": 0.001, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.001, |
|
"loss": 0.162, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5201857806359415, |
|
"eval_f1_macro": 0.7530230371766228, |
|
"eval_f1_micro": 0.8187878039748179, |
|
"eval_loss": 0.1304706484079361, |
|
"eval_roc_auc": 0.8764409254464951, |
|
"eval_runtime": 704.2301, |
|
"eval_samples_per_second": 3.975, |
|
"eval_steps_per_second": 0.248, |
|
"learning_rate": 0.001, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.1548, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5248302965344765, |
|
"eval_f1_macro": 0.788713547954132, |
|
"eval_f1_micro": 0.829077551987677, |
|
"eval_loss": 0.12419470399618149, |
|
"eval_roc_auc": 0.8945177280625883, |
|
"eval_runtime": 725.0968, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 0.241, |
|
"learning_rate": 0.001, |
|
"step": 9112 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.1532, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5226866738120758, |
|
"eval_f1_macro": 0.782300591476402, |
|
"eval_f1_micro": 0.8292102759276879, |
|
"eval_loss": 0.12466239929199219, |
|
"eval_roc_auc": 0.8934405856125267, |
|
"eval_runtime": 737.9752, |
|
"eval_samples_per_second": 3.793, |
|
"eval_steps_per_second": 0.237, |
|
"learning_rate": 0.001, |
|
"step": 9648 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.152, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5280457306180779, |
|
"eval_f1_macro": 0.7687831529578533, |
|
"eval_f1_micro": 0.8237944471505114, |
|
"eval_loss": 0.1271921843290329, |
|
"eval_roc_auc": 0.8832304052589448, |
|
"eval_runtime": 742.4217, |
|
"eval_samples_per_second": 3.77, |
|
"eval_steps_per_second": 0.236, |
|
"learning_rate": 0.001, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.1479, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5287602715255448, |
|
"eval_f1_macro": 0.7783155153237968, |
|
"eval_f1_micro": 0.8279668813247469, |
|
"eval_loss": 0.12385135143995285, |
|
"eval_roc_auc": 0.8833930136365329, |
|
"eval_runtime": 714.6841, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.245, |
|
"learning_rate": 0.001, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 0.001, |
|
"loss": 0.1483, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.534119328331547, |
|
"eval_f1_macro": 0.791367242205616, |
|
"eval_f1_micro": 0.8361280949209999, |
|
"eval_loss": 0.1375582218170166, |
|
"eval_roc_auc": 0.8918987306282016, |
|
"eval_runtime": 704.0932, |
|
"eval_samples_per_second": 3.975, |
|
"eval_steps_per_second": 0.249, |
|
"learning_rate": 0.001, |
|
"step": 11256 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.1448, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5380493033226152, |
|
"eval_f1_macro": 0.7773902467356547, |
|
"eval_f1_micro": 0.8291756052712227, |
|
"eval_loss": 0.12673501670360565, |
|
"eval_roc_auc": 0.8842480463699955, |
|
"eval_runtime": 694.3206, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.252, |
|
"learning_rate": 0.001, |
|
"step": 11792 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 0.001, |
|
"loss": 0.1456, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5448374419435512, |
|
"eval_f1_macro": 0.7913781838771046, |
|
"eval_f1_micro": 0.8334247577253612, |
|
"eval_loss": 0.12168043851852417, |
|
"eval_roc_auc": 0.8883093845683582, |
|
"eval_runtime": 716.6426, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 0.244, |
|
"learning_rate": 0.001, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 0.001, |
|
"loss": 0.1441, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5380493033226152, |
|
"eval_f1_macro": 0.7852190635254378, |
|
"eval_f1_micro": 0.8282665343094677, |
|
"eval_loss": 0.11930090188980103, |
|
"eval_roc_auc": 0.8801416627925638, |
|
"eval_runtime": 736.3569, |
|
"eval_samples_per_second": 3.801, |
|
"eval_steps_per_second": 0.238, |
|
"learning_rate": 0.001, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.1406, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.534119328331547, |
|
"eval_f1_macro": 0.8019722412424143, |
|
"eval_f1_micro": 0.8392282958199356, |
|
"eval_loss": 0.11846613883972168, |
|
"eval_roc_auc": 0.89877394155691, |
|
"eval_runtime": 732.3109, |
|
"eval_samples_per_second": 3.822, |
|
"eval_steps_per_second": 0.239, |
|
"learning_rate": 0.001, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 0.001, |
|
"loss": 0.1416, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5441229010360843, |
|
"eval_f1_macro": 0.7850806686320094, |
|
"eval_f1_micro": 0.8350723045945452, |
|
"eval_loss": 0.1295102834701538, |
|
"eval_roc_auc": 0.888897831813434, |
|
"eval_runtime": 736.4837, |
|
"eval_samples_per_second": 3.8, |
|
"eval_steps_per_second": 0.238, |
|
"learning_rate": 0.001, |
|
"step": 13936 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.1417, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5305466237942122, |
|
"eval_f1_macro": 0.7699298453614014, |
|
"eval_f1_micro": 0.8287484510532839, |
|
"eval_loss": 0.1389976292848587, |
|
"eval_roc_auc": 0.8808089652631531, |
|
"eval_runtime": 714.8246, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.245, |
|
"learning_rate": 0.001, |
|
"step": 14472 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.1452, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.142, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5441229010360843, |
|
"eval_f1_macro": 0.7857164640115839, |
|
"eval_f1_micro": 0.8328267477203647, |
|
"eval_loss": 0.12560133635997772, |
|
"eval_roc_auc": 0.888767490354217, |
|
"eval_runtime": 711.9043, |
|
"eval_samples_per_second": 3.932, |
|
"eval_steps_per_second": 0.246, |
|
"learning_rate": 0.001, |
|
"step": 15008 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.14, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5359056806002144, |
|
"eval_f1_macro": 0.7758987493304471, |
|
"eval_f1_micro": 0.8291174469664172, |
|
"eval_loss": 0.12682993710041046, |
|
"eval_roc_auc": 0.8815429141058391, |
|
"eval_runtime": 725.5421, |
|
"eval_samples_per_second": 3.858, |
|
"eval_steps_per_second": 0.241, |
|
"learning_rate": 0.001, |
|
"step": 15544 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.1415, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5419792783136834, |
|
"eval_f1_macro": 0.7674795938272058, |
|
"eval_f1_micro": 0.8239833027638985, |
|
"eval_loss": 0.13736343383789062, |
|
"eval_roc_auc": 0.8722214417609943, |
|
"eval_runtime": 716.5365, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 0.244, |
|
"learning_rate": 0.001, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"learning_rate": 0.001, |
|
"loss": 0.1414, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5405501964987496, |
|
"eval_f1_macro": 0.7795434508186554, |
|
"eval_f1_micro": 0.8309928553830993, |
|
"eval_loss": 0.12812598049640656, |
|
"eval_roc_auc": 0.8838076169713958, |
|
"eval_runtime": 695.9054, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.251, |
|
"learning_rate": 0.001, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 31.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1349, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5512683101107538, |
|
"eval_f1_macro": 0.7927055189210745, |
|
"eval_f1_micro": 0.8388680866736234, |
|
"eval_loss": 0.11437654495239258, |
|
"eval_roc_auc": 0.8891921900071421, |
|
"eval_runtime": 701.3503, |
|
"eval_samples_per_second": 3.991, |
|
"eval_steps_per_second": 0.25, |
|
"learning_rate": 0.0001, |
|
"step": 17152 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1294, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5534119328331547, |
|
"eval_f1_macro": 0.7990834574660737, |
|
"eval_f1_micro": 0.8414029503580829, |
|
"eval_loss": 0.10969647765159607, |
|
"eval_roc_auc": 0.8914615216569188, |
|
"eval_runtime": 698.1361, |
|
"eval_samples_per_second": 4.009, |
|
"eval_steps_per_second": 0.251, |
|
"learning_rate": 0.0001, |
|
"step": 17688 |
|
}, |
|
{ |
|
"epoch": 33.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1281, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5519828510182208, |
|
"eval_f1_macro": 0.7981650632699254, |
|
"eval_f1_micro": 0.8424912902634313, |
|
"eval_loss": 0.11602061241865158, |
|
"eval_roc_auc": 0.8925180364347453, |
|
"eval_runtime": 712.6765, |
|
"eval_samples_per_second": 3.927, |
|
"eval_steps_per_second": 0.246, |
|
"learning_rate": 0.0001, |
|
"step": 18224 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1274, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5576991782779565, |
|
"eval_f1_macro": 0.7999145597601702, |
|
"eval_f1_micro": 0.8441375939390237, |
|
"eval_loss": 0.12442068755626678, |
|
"eval_roc_auc": 0.893549294756116, |
|
"eval_runtime": 707.8053, |
|
"eval_samples_per_second": 3.954, |
|
"eval_steps_per_second": 0.247, |
|
"learning_rate": 0.0001, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1243, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.555912826009289, |
|
"eval_f1_macro": 0.7991433380498747, |
|
"eval_f1_micro": 0.8434315360385399, |
|
"eval_loss": 0.10998840630054474, |
|
"eval_roc_auc": 0.889847418195485, |
|
"eval_runtime": 712.761, |
|
"eval_samples_per_second": 3.927, |
|
"eval_steps_per_second": 0.246, |
|
"learning_rate": 0.0001, |
|
"step": 19296 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1231, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.564130046445159, |
|
"eval_f1_macro": 0.8086403056854424, |
|
"eval_f1_micro": 0.8485288775880857, |
|
"eval_loss": 0.10725793987512589, |
|
"eval_roc_auc": 0.8988640009924486, |
|
"eval_runtime": 714.0011, |
|
"eval_samples_per_second": 3.92, |
|
"eval_steps_per_second": 0.245, |
|
"learning_rate": 0.0001, |
|
"step": 19832 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1245, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5602000714540908, |
|
"eval_f1_macro": 0.8053806942991502, |
|
"eval_f1_micro": 0.8455705566857777, |
|
"eval_loss": 0.1091923713684082, |
|
"eval_roc_auc": 0.8915942447938312, |
|
"eval_runtime": 713.3543, |
|
"eval_samples_per_second": 3.924, |
|
"eval_steps_per_second": 0.245, |
|
"learning_rate": 0.0001, |
|
"step": 20368 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1197, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5623436941764915, |
|
"eval_f1_macro": 0.8111886307990975, |
|
"eval_f1_micro": 0.8482584066530071, |
|
"eval_loss": 0.10690516978502274, |
|
"eval_roc_auc": 0.9001525872337657, |
|
"eval_runtime": 742.3723, |
|
"eval_samples_per_second": 3.77, |
|
"eval_steps_per_second": 0.236, |
|
"learning_rate": 0.0001, |
|
"step": 20904 |
|
}, |
|
{ |
|
"epoch": 39.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1242, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5637727759914255, |
|
"eval_f1_macro": 0.8080882298039329, |
|
"eval_f1_micro": 0.8468215158924205, |
|
"eval_loss": 0.10654206573963165, |
|
"eval_roc_auc": 0.8949146236875923, |
|
"eval_runtime": 748.0644, |
|
"eval_samples_per_second": 3.742, |
|
"eval_steps_per_second": 0.234, |
|
"learning_rate": 0.0001, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 40.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1167, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5591282600928903, |
|
"eval_f1_macro": 0.8043452814541342, |
|
"eval_f1_micro": 0.8462199312714777, |
|
"eval_loss": 0.10834041237831116, |
|
"eval_roc_auc": 0.8933701178832314, |
|
"eval_runtime": 743.3565, |
|
"eval_samples_per_second": 3.765, |
|
"eval_steps_per_second": 0.235, |
|
"learning_rate": 0.0001, |
|
"step": 21976 |
|
}, |
|
{ |
|
"epoch": 41.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1201, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1179, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5691318327974276, |
|
"eval_f1_macro": 0.8089823958516517, |
|
"eval_f1_micro": 0.8505242623750304, |
|
"eval_loss": 0.10719550400972366, |
|
"eval_roc_auc": 0.8978175825326844, |
|
"eval_runtime": 698.4214, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.251, |
|
"learning_rate": 0.0001, |
|
"step": 22512 |
|
}, |
|
{ |
|
"epoch": 42.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1186, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5612718828152912, |
|
"eval_f1_macro": 0.8071782588939219, |
|
"eval_f1_micro": 0.8482877132686878, |
|
"eval_loss": 0.11790523678064346, |
|
"eval_roc_auc": 0.8944810701797377, |
|
"eval_runtime": 695.4681, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.252, |
|
"learning_rate": 0.0001, |
|
"step": 23048 |
|
}, |
|
{ |
|
"epoch": 43.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1174, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5655591282600929, |
|
"eval_f1_macro": 0.808033951861079, |
|
"eval_f1_micro": 0.8477235124701268, |
|
"eval_loss": 0.10680884122848511, |
|
"eval_roc_auc": 0.8946411623084602, |
|
"eval_runtime": 698.3846, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.251, |
|
"learning_rate": 0.0001, |
|
"step": 23584 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1153, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.571632725973562, |
|
"eval_f1_macro": 0.8193083754252073, |
|
"eval_f1_micro": 0.85338323172572, |
|
"eval_loss": 0.10468064993619919, |
|
"eval_roc_auc": 0.9025354227051225, |
|
"eval_runtime": 732.8049, |
|
"eval_samples_per_second": 3.82, |
|
"eval_steps_per_second": 0.239, |
|
"learning_rate": 0.0001, |
|
"step": 24120 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1167, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5666309396212933, |
|
"eval_f1_macro": 0.822920967846307, |
|
"eval_f1_micro": 0.8535311572700296, |
|
"eval_loss": 0.10616234689950943, |
|
"eval_roc_auc": 0.9080353429554627, |
|
"eval_runtime": 730.6388, |
|
"eval_samples_per_second": 3.831, |
|
"eval_steps_per_second": 0.24, |
|
"learning_rate": 0.0001, |
|
"step": 24656 |
|
}, |
|
{ |
|
"epoch": 46.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1162, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5694891032511611, |
|
"eval_f1_macro": 0.8179688260529804, |
|
"eval_f1_micro": 0.8521570528356762, |
|
"eval_loss": 0.10597173124551773, |
|
"eval_roc_auc": 0.9006400086530192, |
|
"eval_runtime": 707.3305, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.247, |
|
"learning_rate": 0.0001, |
|
"step": 25192 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1145, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5744908896034298, |
|
"eval_f1_macro": 0.8153624729345684, |
|
"eval_f1_micro": 0.8529072240111792, |
|
"eval_loss": 0.10410725325345993, |
|
"eval_roc_auc": 0.9002018263609444, |
|
"eval_runtime": 693.6803, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.252, |
|
"learning_rate": 0.0001, |
|
"step": 25728 |
|
}, |
|
{ |
|
"epoch": 48.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1143, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5652018578063595, |
|
"eval_f1_macro": 0.8181584192698232, |
|
"eval_f1_micro": 0.8542054951001022, |
|
"eval_loss": 0.10334235429763794, |
|
"eval_roc_auc": 0.9042645729162101, |
|
"eval_runtime": 700.9047, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.25, |
|
"learning_rate": 0.0001, |
|
"step": 26264 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1129, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5719899964272954, |
|
"eval_f1_macro": 0.8102121985255263, |
|
"eval_f1_micro": 0.850807689945335, |
|
"eval_loss": 0.10544609278440475, |
|
"eval_roc_auc": 0.8956334123918851, |
|
"eval_runtime": 741.7594, |
|
"eval_samples_per_second": 3.773, |
|
"eval_steps_per_second": 0.236, |
|
"learning_rate": 0.0001, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0001, |
|
"step": 26800, |
|
"total_flos": 1.1878568451307192e+20, |
|
"train_loss": 0.15577314120620045, |
|
"train_runtime": 143618.003, |
|
"train_samples_per_second": 2.983, |
|
"train_steps_per_second": 0.187 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 26800, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.1878568451307192e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|