|
{ |
|
"best_metric": 0.06187893822789192, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_base/epochs_4_bs_32_lr_5e-5/checkpoint-1350", |
|
"epoch": 4.0, |
|
"global_step": 1792, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.9025092763592619, |
|
"eval_f1": 0.8763735719801217, |
|
"eval_loss": 0.4775541126728058, |
|
"eval_precision": 0.8685320722877382, |
|
"eval_recall": 0.8843579548860163, |
|
"eval_runtime": 2.1198, |
|
"eval_samples_per_second": 780.25, |
|
"eval_steps_per_second": 24.53, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9586079531014384, |
|
"eval_f1": 0.9548124582381745, |
|
"eval_loss": 0.18812087178230286, |
|
"eval_precision": 0.9532058940661091, |
|
"eval_recall": 0.9564244470639948, |
|
"eval_runtime": 2.0628, |
|
"eval_samples_per_second": 801.819, |
|
"eval_steps_per_second": 25.208, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9670795140712628, |
|
"eval_f1": 0.9638712895984035, |
|
"eval_loss": 0.1361953318119049, |
|
"eval_precision": 0.9626898393590305, |
|
"eval_recall": 0.9650556432438913, |
|
"eval_runtime": 2.0682, |
|
"eval_samples_per_second": 799.741, |
|
"eval_steps_per_second": 25.143, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9724504837261314, |
|
"eval_f1": 0.9702885315927622, |
|
"eval_loss": 0.114054836332798, |
|
"eval_precision": 0.9693694412316037, |
|
"eval_recall": 0.9712093664462248, |
|
"eval_runtime": 2.0767, |
|
"eval_samples_per_second": 796.445, |
|
"eval_steps_per_second": 25.039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9757543925043628, |
|
"eval_f1": 0.9740005591277607, |
|
"eval_loss": 0.09939529001712799, |
|
"eval_precision": 0.9734757010278415, |
|
"eval_recall": 0.9745259834968333, |
|
"eval_runtime": 2.0489, |
|
"eval_samples_per_second": 807.255, |
|
"eval_steps_per_second": 25.379, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9776689652835432, |
|
"eval_f1": 0.9757610820731757, |
|
"eval_loss": 0.088424913585186, |
|
"eval_precision": 0.9743033007310611, |
|
"eval_recall": 0.9772232323030509, |
|
"eval_runtime": 2.0999, |
|
"eval_samples_per_second": 787.646, |
|
"eval_steps_per_second": 24.763, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.9774148184544484, |
|
"eval_f1": 0.9754915162941018, |
|
"eval_loss": 0.08702255040407181, |
|
"eval_precision": 0.9740438247011952, |
|
"eval_recall": 0.9769435176120357, |
|
"eval_runtime": 2.0824, |
|
"eval_samples_per_second": 794.288, |
|
"eval_steps_per_second": 24.972, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9789905287948357, |
|
"eval_f1": 0.977314445331205, |
|
"eval_loss": 0.07801014184951782, |
|
"eval_precision": 0.9759708302616111, |
|
"eval_recall": 0.9786617649997003, |
|
"eval_runtime": 2.0909, |
|
"eval_samples_per_second": 791.042, |
|
"eval_steps_per_second": 24.87, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9785669507463445, |
|
"eval_f1": 0.9774886245709268, |
|
"eval_loss": 0.08075062185525894, |
|
"eval_precision": 0.9763779527559056, |
|
"eval_recall": 0.9786018261373399, |
|
"eval_runtime": 2.0836, |
|
"eval_samples_per_second": 793.818, |
|
"eval_steps_per_second": 24.957, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.6132812500000005e-05, |
|
"loss": 0.241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.9803968079158266, |
|
"eval_f1": 0.9791845450735426, |
|
"eval_loss": 0.07434948533773422, |
|
"eval_precision": 0.978091421964396, |
|
"eval_recall": 0.9802801142834309, |
|
"eval_runtime": 2.089, |
|
"eval_samples_per_second": 791.781, |
|
"eval_steps_per_second": 24.893, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.9808542722081971, |
|
"eval_f1": 0.9793645725233495, |
|
"eval_loss": 0.0712265595793724, |
|
"eval_precision": 0.9782517691617661, |
|
"eval_recall": 0.9804799104912989, |
|
"eval_runtime": 2.1304, |
|
"eval_samples_per_second": 776.388, |
|
"eval_steps_per_second": 24.409, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.981531997085783, |
|
"eval_f1": 0.9805385337032675, |
|
"eval_loss": 0.07107982784509659, |
|
"eval_precision": 0.9795999760703532, |
|
"eval_recall": 0.9814788915306387, |
|
"eval_runtime": 2.1757, |
|
"eval_samples_per_second": 760.207, |
|
"eval_steps_per_second": 23.9, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.9790413581606546, |
|
"eval_f1": 0.9788185948968892, |
|
"eval_loss": 0.07391183823347092, |
|
"eval_precision": 0.9784961265074674, |
|
"eval_recall": 0.9791412758985835, |
|
"eval_runtime": 2.0804, |
|
"eval_samples_per_second": 795.028, |
|
"eval_steps_per_second": 24.995, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.9813286796225072, |
|
"eval_f1": 0.9801365487502994, |
|
"eval_loss": 0.0686594694852829, |
|
"eval_precision": 0.9793349689825065, |
|
"eval_recall": 0.9809394417693952, |
|
"eval_runtime": 2.0741, |
|
"eval_samples_per_second": 797.437, |
|
"eval_steps_per_second": 25.071, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.9802104335744904, |
|
"eval_f1": 0.9797732607501373, |
|
"eval_loss": 0.07021336257457733, |
|
"eval_precision": 0.9796460530520933, |
|
"eval_recall": 0.9799005014884817, |
|
"eval_runtime": 2.1147, |
|
"eval_samples_per_second": 782.145, |
|
"eval_steps_per_second": 24.59, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.9814303383541452, |
|
"eval_f1": 0.9805597435923037, |
|
"eval_loss": 0.0652608796954155, |
|
"eval_precision": 0.9800606762205102, |
|
"eval_recall": 0.981059319494116, |
|
"eval_runtime": 2.0748, |
|
"eval_samples_per_second": 797.185, |
|
"eval_steps_per_second": 25.063, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.981921688890395, |
|
"eval_f1": 0.9809404063385623, |
|
"eval_loss": 0.06410165876150131, |
|
"eval_precision": 0.9798648352305576, |
|
"eval_recall": 0.9820183412918823, |
|
"eval_runtime": 2.0778, |
|
"eval_samples_per_second": 796.04, |
|
"eval_steps_per_second": 25.027, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.9811253621592314, |
|
"eval_f1": 0.9806592046010524, |
|
"eval_loss": 0.06488435715436935, |
|
"eval_precision": 0.9801796407185629, |
|
"eval_recall": 0.9811392379772632, |
|
"eval_runtime": 2.1012, |
|
"eval_samples_per_second": 787.158, |
|
"eval_steps_per_second": 24.747, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.981921688890395, |
|
"eval_f1": 0.980970827259839, |
|
"eval_loss": 0.06455742567777634, |
|
"eval_precision": 0.9803835485222805, |
|
"eval_recall": 0.981558810013786, |
|
"eval_runtime": 2.0619, |
|
"eval_samples_per_second": 802.191, |
|
"eval_steps_per_second": 25.22, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2181919642857143e-05, |
|
"loss": 0.0552, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.9817522576709985, |
|
"eval_f1": 0.9810314877303675, |
|
"eval_loss": 0.06508930027484894, |
|
"eval_precision": 0.9804050683428115, |
|
"eval_recall": 0.9816587081177199, |
|
"eval_runtime": 2.1717, |
|
"eval_samples_per_second": 761.606, |
|
"eval_steps_per_second": 23.944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.9813117365005676, |
|
"eval_f1": 0.9809139731278326, |
|
"eval_loss": 0.06810390949249268, |
|
"eval_precision": 0.9801703672671415, |
|
"eval_recall": 0.9816587081177199, |
|
"eval_runtime": 2.0658, |
|
"eval_samples_per_second": 800.673, |
|
"eval_steps_per_second": 25.172, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.9818539164026363, |
|
"eval_f1": 0.9811795481094681, |
|
"eval_loss": 0.06340151280164719, |
|
"eval_precision": 0.9806410409930947, |
|
"eval_recall": 0.9817186469800803, |
|
"eval_runtime": 2.0822, |
|
"eval_samples_per_second": 794.348, |
|
"eval_steps_per_second": 24.973, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.9818369732806967, |
|
"eval_f1": 0.9809949692565679, |
|
"eval_loss": 0.06365738064050674, |
|
"eval_precision": 0.9801926874513793, |
|
"eval_recall": 0.9817985654632275, |
|
"eval_runtime": 2.0674, |
|
"eval_samples_per_second": 800.041, |
|
"eval_steps_per_second": 25.152, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.9816336558174209, |
|
"eval_f1": 0.9807511830834048, |
|
"eval_loss": 0.06771722435951233, |
|
"eval_precision": 0.9801640358404342, |
|
"eval_recall": 0.9813390341851311, |
|
"eval_runtime": 2.1084, |
|
"eval_samples_per_second": 784.48, |
|
"eval_steps_per_second": 24.663, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.9821419494756104, |
|
"eval_f1": 0.9814700185699169, |
|
"eval_loss": 0.06466551870107651, |
|
"eval_precision": 0.9808824409810222, |
|
"eval_recall": 0.9820583005334559, |
|
"eval_runtime": 2.0736, |
|
"eval_samples_per_second": 797.648, |
|
"eval_steps_per_second": 25.077, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.9823113806950069, |
|
"eval_f1": 0.9817473789316026, |
|
"eval_loss": 0.06309983134269714, |
|
"eval_precision": 0.9812770713986307, |
|
"eval_recall": 0.9822181374997503, |
|
"eval_runtime": 2.0792, |
|
"eval_samples_per_second": 795.5, |
|
"eval_steps_per_second": 25.01, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.9825994137679809, |
|
"eval_f1": 0.9820124843945068, |
|
"eval_loss": 0.06187893822789192, |
|
"eval_precision": 0.9817869553061469, |
|
"eval_recall": 0.9822381171205371, |
|
"eval_runtime": 2.1014, |
|
"eval_samples_per_second": 787.097, |
|
"eval_steps_per_second": 24.745, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9825994137679809, |
|
"eval_f1": 0.982114145903031, |
|
"eval_loss": 0.06504332274198532, |
|
"eval_precision": 0.9817905918057663, |
|
"eval_recall": 0.982437913328405, |
|
"eval_runtime": 2.0928, |
|
"eval_samples_per_second": 790.338, |
|
"eval_steps_per_second": 24.847, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_accuracy": 0.9826502431337998, |
|
"eval_f1": 0.9820854386770786, |
|
"eval_loss": 0.06560202687978745, |
|
"eval_precision": 0.9816933181609471, |
|
"eval_recall": 0.9824778725699786, |
|
"eval_runtime": 2.107, |
|
"eval_samples_per_second": 785.001, |
|
"eval_steps_per_second": 24.68, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.231026785714286e-06, |
|
"loss": 0.0349, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.9823283238169465, |
|
"eval_f1": 0.9818849987017915, |
|
"eval_loss": 0.06487075239419937, |
|
"eval_precision": 0.9815321340866892, |
|
"eval_recall": 0.9822381171205371, |
|
"eval_runtime": 2.0794, |
|
"eval_samples_per_second": 795.421, |
|
"eval_steps_per_second": 25.007, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_accuracy": 0.9819894613781536, |
|
"eval_f1": 0.9817542668251226, |
|
"eval_loss": 0.06555231660604477, |
|
"eval_precision": 0.9814504213090531, |
|
"eval_recall": 0.9820583005334559, |
|
"eval_runtime": 2.0691, |
|
"eval_samples_per_second": 799.369, |
|
"eval_steps_per_second": 25.131, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_accuracy": 0.9817353145490588, |
|
"eval_f1": 0.9813986013986015, |
|
"eval_loss": 0.0676359012722969, |
|
"eval_precision": 0.9814182101540491, |
|
"eval_recall": 0.9813789934267048, |
|
"eval_runtime": 2.0661, |
|
"eval_samples_per_second": 800.526, |
|
"eval_steps_per_second": 25.168, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_accuracy": 0.9817014283051795, |
|
"eval_f1": 0.9812628593116398, |
|
"eval_loss": 0.06634774059057236, |
|
"eval_precision": 0.981066885023267, |
|
"eval_recall": 0.9814589119098519, |
|
"eval_runtime": 2.0684, |
|
"eval_samples_per_second": 799.649, |
|
"eval_steps_per_second": 25.14, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.9819894613781536, |
|
"eval_f1": 0.981464466903687, |
|
"eval_loss": 0.06648925691843033, |
|
"eval_precision": 0.9811705037838702, |
|
"eval_recall": 0.9817586062216539, |
|
"eval_runtime": 2.0825, |
|
"eval_samples_per_second": 794.254, |
|
"eval_steps_per_second": 24.97, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.9822605513291879, |
|
"eval_f1": 0.9816685969886978, |
|
"eval_loss": 0.06550350040197372, |
|
"eval_precision": 0.9811396068256661, |
|
"eval_recall": 0.9821981578789635, |
|
"eval_runtime": 2.0814, |
|
"eval_samples_per_second": 794.645, |
|
"eval_steps_per_second": 24.983, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1792, |
|
"total_flos": 211503244790400.0, |
|
"train_loss": 0.09681594052485057, |
|
"train_runtime": 1409.8405, |
|
"train_samples_per_second": 40.586, |
|
"train_steps_per_second": 1.271 |
|
} |
|
], |
|
"max_steps": 1792, |
|
"num_train_epochs": 4, |
|
"total_flos": 211503244790400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|