zaemyung's picture
Add model files
b9e1533
{
"best_metric": 0.06025264039635658,
"best_model_checkpoint": "roberta-large-finetuned-iterate_intent_classification-200k_all_extra_datasets_v2_single-task_multi-sent/checkpoint-56000",
"epoch": 1.5705631590756113,
"global_step": 56000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.9860612519632043e-05,
"loss": 0.139,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 2.9720383666143147e-05,
"loss": 0.0977,
"step": 1000
},
{
"epoch": 0.03,
"eval_accuracy": 0.971162091635854,
"eval_f1": 0.5399681845297277,
"eval_loss": 0.09991234540939331,
"eval_precision": 0.5387896825396825,
"eval_recall": 0.5411518533280192,
"eval_runtime": 37.2769,
"eval_samples_per_second": 80.479,
"eval_steps_per_second": 5.043,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 2.9580154812654254e-05,
"loss": 0.0907,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 2.943992595916536e-05,
"loss": 0.085,
"step": 2000
},
{
"epoch": 0.06,
"eval_accuracy": 0.9737669546615523,
"eval_f1": 0.5691315136476427,
"eval_loss": 0.09658516198396683,
"eval_precision": 0.566936919122009,
"eval_recall": 0.5713431646074133,
"eval_runtime": 38.0964,
"eval_samples_per_second": 78.748,
"eval_steps_per_second": 4.935,
"step": 2000
},
{
"epoch": 0.07,
"learning_rate": 2.9299697105676466e-05,
"loss": 0.085,
"step": 2500
},
{
"epoch": 0.08,
"learning_rate": 2.915946825218757e-05,
"loss": 0.0758,
"step": 3000
},
{
"epoch": 0.08,
"eval_accuracy": 0.9763026620317009,
"eval_f1": 0.6236316332040647,
"eval_loss": 0.07386751472949982,
"eval_precision": 0.6146105466860183,
"eval_recall": 0.6329214826624153,
"eval_runtime": 37.6732,
"eval_samples_per_second": 79.632,
"eval_steps_per_second": 4.99,
"step": 3000
},
{
"epoch": 0.1,
"learning_rate": 2.9019519856405656e-05,
"loss": 0.0781,
"step": 3500
},
{
"epoch": 0.11,
"learning_rate": 2.887929100291676e-05,
"loss": 0.0782,
"step": 4000
},
{
"epoch": 0.11,
"eval_accuracy": 0.9755096771813999,
"eval_f1": 0.5991719984945427,
"eval_loss": 0.07741887867450714,
"eval_precision": 0.5675579322638146,
"eval_recall": 0.6345157433240335,
"eval_runtime": 36.9968,
"eval_samples_per_second": 81.088,
"eval_steps_per_second": 5.082,
"step": 4000
},
{
"epoch": 0.13,
"learning_rate": 2.8739342607134845e-05,
"loss": 0.0778,
"step": 4500
},
{
"epoch": 0.14,
"learning_rate": 2.859911375364595e-05,
"loss": 0.0695,
"step": 5000
},
{
"epoch": 0.14,
"eval_accuracy": 0.9774137628975298,
"eval_f1": 0.680416421135337,
"eval_loss": 0.07352685928344727,
"eval_precision": 0.6707978311386522,
"eval_recall": 0.6903148664806696,
"eval_runtime": 36.5263,
"eval_samples_per_second": 82.133,
"eval_steps_per_second": 5.147,
"step": 5000
},
{
"epoch": 0.15,
"learning_rate": 2.8458884900157057e-05,
"loss": 0.0699,
"step": 5500
},
{
"epoch": 0.17,
"learning_rate": 2.8318656046668165e-05,
"loss": 0.0722,
"step": 6000
},
{
"epoch": 0.17,
"eval_accuracy": 0.9802675862832062,
"eval_f1": 0.7195344478966317,
"eval_loss": 0.06846912950277328,
"eval_precision": 0.7123327800019529,
"eval_recall": 0.7268832204065365,
"eval_runtime": 37.6415,
"eval_samples_per_second": 79.699,
"eval_steps_per_second": 4.994,
"step": 6000
},
{
"epoch": 0.18,
"learning_rate": 2.817842719317927e-05,
"loss": 0.0702,
"step": 6500
},
{
"epoch": 0.2,
"learning_rate": 2.8038198339690376e-05,
"loss": 0.0667,
"step": 7000
},
{
"epoch": 0.2,
"eval_accuracy": 0.9801108334639607,
"eval_f1": 0.7278459189202502,
"eval_loss": 0.06565115600824356,
"eval_precision": 0.7197272284461763,
"eval_recall": 0.7361498605021921,
"eval_runtime": 37.325,
"eval_samples_per_second": 80.375,
"eval_steps_per_second": 5.037,
"step": 7000
},
{
"epoch": 0.21,
"learning_rate": 2.789796948620148e-05,
"loss": 0.0658,
"step": 7500
},
{
"epoch": 0.22,
"learning_rate": 2.7757740632712588e-05,
"loss": 0.0688,
"step": 8000
},
{
"epoch": 0.22,
"eval_accuracy": 0.9796728476454805,
"eval_f1": 0.7163943328232216,
"eval_loss": 0.06845768541097641,
"eval_precision": 0.7099109676156932,
"eval_recall": 0.7229972100438422,
"eval_runtime": 36.6425,
"eval_samples_per_second": 81.872,
"eval_steps_per_second": 5.131,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 2.7617511779223692e-05,
"loss": 0.0678,
"step": 8500
},
{
"epoch": 0.25,
"learning_rate": 2.7477563383441778e-05,
"loss": 0.0654,
"step": 9000
},
{
"epoch": 0.25,
"eval_accuracy": 0.9798526523499091,
"eval_f1": 0.7101066431855984,
"eval_loss": 0.06661399453878403,
"eval_precision": 0.6974822217951182,
"eval_recall": 0.7231964926265444,
"eval_runtime": 41.9581,
"eval_samples_per_second": 71.5,
"eval_steps_per_second": 4.481,
"step": 9000
},
{
"epoch": 0.27,
"learning_rate": 2.7337334529952882e-05,
"loss": 0.0691,
"step": 9500
},
{
"epoch": 0.28,
"learning_rate": 2.7197386134170967e-05,
"loss": 0.0677,
"step": 10000
},
{
"epoch": 0.28,
"eval_accuracy": 0.980018625923228,
"eval_f1": 0.7221836228287842,
"eval_loss": 0.07092269510030746,
"eval_precision": 0.7193988530749457,
"eval_recall": 0.7249900358708649,
"eval_runtime": 37.1023,
"eval_samples_per_second": 80.858,
"eval_steps_per_second": 5.067,
"step": 10000
},
{
"epoch": 0.29,
"learning_rate": 2.7057157280682075e-05,
"loss": 0.0686,
"step": 10500
},
{
"epoch": 0.31,
"learning_rate": 2.691692842719318e-05,
"loss": 0.0666,
"step": 11000
},
{
"epoch": 0.31,
"eval_accuracy": 0.9806548579542835,
"eval_f1": 0.7343549184379634,
"eval_loss": 0.0653684139251709,
"eval_precision": 0.7286639199529135,
"eval_recall": 0.7401355121562375,
"eval_runtime": 35.9849,
"eval_samples_per_second": 83.368,
"eval_steps_per_second": 5.224,
"step": 11000
},
{
"epoch": 0.32,
"learning_rate": 2.6776699573704287e-05,
"loss": 0.0667,
"step": 11500
},
{
"epoch": 0.34,
"learning_rate": 2.663647072021539e-05,
"loss": 0.0635,
"step": 12000
},
{
"epoch": 0.34,
"eval_accuracy": 0.9803413523157923,
"eval_f1": 0.7172954434100932,
"eval_loss": 0.06524238735437393,
"eval_precision": 0.7056101792943898,
"eval_recall": 0.7293742526903149,
"eval_runtime": 37.7011,
"eval_samples_per_second": 79.573,
"eval_steps_per_second": 4.987,
"step": 12000
},
{
"epoch": 0.35,
"learning_rate": 2.6496241866726498e-05,
"loss": 0.066,
"step": 12500
},
{
"epoch": 0.36,
"learning_rate": 2.6356013013237602e-05,
"loss": 0.0635,
"step": 13000
},
{
"epoch": 0.36,
"eval_accuracy": 0.9800601193165577,
"eval_f1": 0.7266683205160009,
"eval_loss": 0.07080024480819702,
"eval_precision": 0.7236881114734658,
"eval_recall": 0.7296731765643683,
"eval_runtime": 37.3623,
"eval_samples_per_second": 80.295,
"eval_steps_per_second": 5.032,
"step": 13000
},
{
"epoch": 0.38,
"learning_rate": 2.621578415974871e-05,
"loss": 0.0614,
"step": 13500
},
{
"epoch": 0.39,
"learning_rate": 2.6075555306259814e-05,
"loss": 0.0619,
"step": 14000
},
{
"epoch": 0.39,
"eval_accuracy": 0.979778886317323,
"eval_f1": 0.7210257933561068,
"eval_loss": 0.06537187844514847,
"eval_precision": 0.7165207123880744,
"eval_recall": 0.7255878836189718,
"eval_runtime": 37.6468,
"eval_samples_per_second": 79.688,
"eval_steps_per_second": 4.994,
"step": 14000
},
{
"epoch": 0.41,
"learning_rate": 2.59356069104779e-05,
"loss": 0.0623,
"step": 14500
},
{
"epoch": 0.42,
"learning_rate": 2.5795378056989004e-05,
"loss": 0.0621,
"step": 15000
},
{
"epoch": 0.42,
"eval_accuracy": 0.9795529778425279,
"eval_f1": 0.7259988073941562,
"eval_loss": 0.07811370491981506,
"eval_precision": 0.7241276764472641,
"eval_recall": 0.7278796333200478,
"eval_runtime": 37.6797,
"eval_samples_per_second": 79.618,
"eval_steps_per_second": 4.989,
"step": 15000
},
{
"epoch": 0.43,
"learning_rate": 2.565542966120709e-05,
"loss": 0.0663,
"step": 15500
},
{
"epoch": 0.45,
"learning_rate": 2.5515200807718197e-05,
"loss": 0.0635,
"step": 16000
},
{
"epoch": 0.45,
"eval_accuracy": 0.9805442089054043,
"eval_f1": 0.7281001137656427,
"eval_loss": 0.06664826720952988,
"eval_precision": 0.7229152342598959,
"eval_recall": 0.7333599043443603,
"eval_runtime": 37.7966,
"eval_samples_per_second": 79.372,
"eval_steps_per_second": 4.974,
"step": 16000
},
{
"epoch": 0.46,
"learning_rate": 2.53749719542293e-05,
"loss": 0.0619,
"step": 16500
},
{
"epoch": 0.48,
"learning_rate": 2.5235023558447387e-05,
"loss": 0.0592,
"step": 17000
},
{
"epoch": 0.48,
"eval_accuracy": 0.979691289153627,
"eval_f1": 0.7246881057706644,
"eval_loss": 0.0674288421869278,
"eval_precision": 0.7229991074085094,
"eval_recall": 0.7263850139497808,
"eval_runtime": 36.2962,
"eval_samples_per_second": 82.653,
"eval_steps_per_second": 5.18,
"step": 17000
},
{
"epoch": 0.49,
"learning_rate": 2.509479470495849e-05,
"loss": 0.0616,
"step": 17500
},
{
"epoch": 0.5,
"learning_rate": 2.49545658514696e-05,
"loss": 0.062,
"step": 18000
},
{
"epoch": 0.5,
"eval_accuracy": 0.9812219343297894,
"eval_f1": 0.7340672360171793,
"eval_loss": 0.06332722306251526,
"eval_precision": 0.7274239311221994,
"eval_recall": 0.7408330011956955,
"eval_runtime": 38.753,
"eval_samples_per_second": 77.413,
"eval_steps_per_second": 4.851,
"step": 18000
},
{
"epoch": 0.52,
"learning_rate": 2.4814336997980706e-05,
"loss": 0.063,
"step": 18500
},
{
"epoch": 0.53,
"learning_rate": 2.4674108144491813e-05,
"loss": 0.0607,
"step": 19000
},
{
"epoch": 0.53,
"eval_accuracy": 0.9811988824446063,
"eval_f1": 0.7373217115689383,
"eval_loss": 0.06261081993579865,
"eval_precision": 0.732965734541158,
"eval_recall": 0.7417297728178557,
"eval_runtime": 36.3964,
"eval_samples_per_second": 82.426,
"eval_steps_per_second": 5.165,
"step": 19000
},
{
"epoch": 0.55,
"learning_rate": 2.4533879291002917e-05,
"loss": 0.0634,
"step": 19500
},
{
"epoch": 0.56,
"learning_rate": 2.4393650437514025e-05,
"loss": 0.0629,
"step": 20000
},
{
"epoch": 0.56,
"eval_accuracy": 0.9799218080054587,
"eval_f1": 0.7318235995232418,
"eval_loss": 0.07822923362255096,
"eval_precision": 0.7295049504950495,
"eval_recall": 0.7341570346751694,
"eval_runtime": 37.4868,
"eval_samples_per_second": 80.028,
"eval_steps_per_second": 5.015,
"step": 20000
},
{
"epoch": 0.57,
"learning_rate": 2.425342158402513e-05,
"loss": 0.0613,
"step": 20500
},
{
"epoch": 0.59,
"learning_rate": 2.4113192730536237e-05,
"loss": 0.0624,
"step": 21000
},
{
"epoch": 0.59,
"eval_accuracy": 0.9803644042009756,
"eval_f1": 0.7339413164155432,
"eval_loss": 0.06663613021373749,
"eval_precision": 0.7301775147928994,
"eval_recall": 0.7377441211638103,
"eval_runtime": 35.995,
"eval_samples_per_second": 83.345,
"eval_steps_per_second": 5.223,
"step": 21000
},
{
"epoch": 0.6,
"learning_rate": 2.3972963877047344e-05,
"loss": 0.0608,
"step": 21500
},
{
"epoch": 0.62,
"learning_rate": 2.3833015481265426e-05,
"loss": 0.0624,
"step": 22000
},
{
"epoch": 0.62,
"eval_accuracy": 0.9802122617587666,
"eval_f1": 0.7311229308545011,
"eval_loss": 0.06742220371961594,
"eval_precision": 0.7294911219125086,
"eval_recall": 0.7327620565962535,
"eval_runtime": 37.1663,
"eval_samples_per_second": 80.718,
"eval_steps_per_second": 5.058,
"step": 22000
},
{
"epoch": 0.63,
"learning_rate": 2.3692786627776534e-05,
"loss": 0.0593,
"step": 22500
},
{
"epoch": 0.65,
"learning_rate": 2.3552557774287638e-05,
"loss": 0.0593,
"step": 23000
},
{
"epoch": 0.65,
"eval_accuracy": 0.9812634277231191,
"eval_f1": 0.7430700447093891,
"eval_loss": 0.06372907757759094,
"eval_precision": 0.7409352090350704,
"eval_recall": 0.7452172180151455,
"eval_runtime": 37.7419,
"eval_samples_per_second": 79.487,
"eval_steps_per_second": 4.981,
"step": 23000
},
{
"epoch": 0.66,
"learning_rate": 2.3412328920798745e-05,
"loss": 0.0629,
"step": 23500
},
{
"epoch": 0.67,
"learning_rate": 2.327210006730985e-05,
"loss": 0.0605,
"step": 24000
},
{
"epoch": 0.67,
"eval_accuracy": 0.9806087541839171,
"eval_f1": 0.7302190657197158,
"eval_loss": 0.06761059165000916,
"eval_precision": 0.7330789315123519,
"eval_recall": 0.7273814268632921,
"eval_runtime": 37.6798,
"eval_samples_per_second": 79.618,
"eval_steps_per_second": 4.989,
"step": 24000
},
{
"epoch": 0.69,
"learning_rate": 2.3131871213820957e-05,
"loss": 0.0608,
"step": 24500
},
{
"epoch": 0.7,
"learning_rate": 2.299164236033206e-05,
"loss": 0.0577,
"step": 25000
},
{
"epoch": 0.7,
"eval_accuracy": 0.9804105079713419,
"eval_f1": 0.7315063052328468,
"eval_loss": 0.07262897491455078,
"eval_precision": 0.7289728873936275,
"eval_recall": 0.7340573933838183,
"eval_runtime": 36.2251,
"eval_samples_per_second": 82.815,
"eval_steps_per_second": 5.19,
"step": 25000
},
{
"epoch": 0.72,
"learning_rate": 2.285141350684317e-05,
"loss": 0.065,
"step": 25500
},
{
"epoch": 0.73,
"learning_rate": 2.2711184653354273e-05,
"loss": 0.0616,
"step": 26000
},
{
"epoch": 0.73,
"eval_accuracy": 0.981291089985339,
"eval_f1": 0.7420074349442379,
"eval_loss": 0.06614366918802261,
"eval_precision": 0.738238485057698,
"eval_recall": 0.7458150657632523,
"eval_runtime": 37.1932,
"eval_samples_per_second": 80.66,
"eval_steps_per_second": 5.055,
"step": 26000
},
{
"epoch": 0.74,
"learning_rate": 2.257095579986538e-05,
"loss": 0.0587,
"step": 26500
},
{
"epoch": 0.76,
"learning_rate": 2.2431007404083466e-05,
"loss": 0.0598,
"step": 27000
},
{
"epoch": 0.76,
"eval_accuracy": 0.9814386220505112,
"eval_f1": 0.7464872353057589,
"eval_loss": 0.06462829560041428,
"eval_precision": 0.7413522012578616,
"eval_recall": 0.7516939019529693,
"eval_runtime": 37.4813,
"eval_samples_per_second": 80.04,
"eval_steps_per_second": 5.016,
"step": 27000
},
{
"epoch": 0.77,
"learning_rate": 2.229077855059457e-05,
"loss": 0.0536,
"step": 27500
},
{
"epoch": 0.79,
"learning_rate": 2.2150549697105678e-05,
"loss": 0.0596,
"step": 28000
},
{
"epoch": 0.79,
"eval_accuracy": 0.9802906381683895,
"eval_f1": 0.735488974587165,
"eval_loss": 0.06780258566141129,
"eval_precision": 0.7299313052011777,
"eval_recall": 0.7411319250697489,
"eval_runtime": 37.4961,
"eval_samples_per_second": 80.008,
"eval_steps_per_second": 5.014,
"step": 28000
},
{
"epoch": 0.8,
"learning_rate": 2.201060130132376e-05,
"loss": 0.0602,
"step": 28500
},
{
"epoch": 0.81,
"learning_rate": 2.1870372447834867e-05,
"loss": 0.0593,
"step": 29000
},
{
"epoch": 0.81,
"eval_accuracy": 0.9815723229845736,
"eval_f1": 0.7491701758731731,
"eval_loss": 0.06298363208770752,
"eval_precision": 0.7449995073406247,
"eval_recall": 0.7533878039059386,
"eval_runtime": 37.7907,
"eval_samples_per_second": 79.385,
"eval_steps_per_second": 4.975,
"step": 29000
},
{
"epoch": 0.83,
"learning_rate": 2.173014359434597e-05,
"loss": 0.0586,
"step": 29500
},
{
"epoch": 0.84,
"learning_rate": 2.158991474085708e-05,
"loss": 0.0619,
"step": 30000
},
{
"epoch": 0.84,
"eval_accuracy": 0.9810697918875806,
"eval_f1": 0.7425102020503633,
"eval_loss": 0.06618673354387283,
"eval_precision": 0.7416981507257904,
"eval_recall": 0.7433240334794738,
"eval_runtime": 37.549,
"eval_samples_per_second": 79.896,
"eval_steps_per_second": 5.007,
"step": 30000
},
{
"epoch": 0.86,
"learning_rate": 2.1449685887368183e-05,
"loss": 0.0595,
"step": 30500
},
{
"epoch": 0.87,
"learning_rate": 2.130945703387929e-05,
"loss": 0.0589,
"step": 31000
},
{
"epoch": 0.87,
"eval_accuracy": 0.9798572627269458,
"eval_f1": 0.7328214215396731,
"eval_loss": 0.06754804402589798,
"eval_precision": 0.724124513618677,
"eval_recall": 0.7417297728178557,
"eval_runtime": 37.8499,
"eval_samples_per_second": 79.261,
"eval_steps_per_second": 4.967,
"step": 31000
},
{
"epoch": 0.88,
"learning_rate": 2.1169228180390395e-05,
"loss": 0.0593,
"step": 31500
},
{
"epoch": 0.9,
"learning_rate": 2.1028999326901502e-05,
"loss": 0.0616,
"step": 32000
},
{
"epoch": 0.9,
"eval_accuracy": 0.9811481682972033,
"eval_f1": 0.752437998118905,
"eval_loss": 0.06686053425073624,
"eval_precision": 0.7476635514018691,
"eval_recall": 0.7572738142686329,
"eval_runtime": 37.608,
"eval_samples_per_second": 79.77,
"eval_steps_per_second": 4.999,
"step": 32000
},
{
"epoch": 0.91,
"learning_rate": 2.088877047341261e-05,
"loss": 0.0596,
"step": 32500
},
{
"epoch": 0.93,
"learning_rate": 2.0748541619923717e-05,
"loss": 0.0599,
"step": 33000
},
{
"epoch": 0.93,
"eval_accuracy": 0.9808623249209321,
"eval_f1": 0.7464684014869889,
"eval_loss": 0.0668148472905159,
"eval_precision": 0.7426767925830949,
"eval_recall": 0.7502989238740534,
"eval_runtime": 37.7142,
"eval_samples_per_second": 79.546,
"eval_steps_per_second": 4.985,
"step": 33000
},
{
"epoch": 0.94,
"learning_rate": 2.06085932241418e-05,
"loss": 0.0611,
"step": 33500
},
{
"epoch": 0.95,
"learning_rate": 2.0468364370652904e-05,
"loss": 0.0578,
"step": 34000
},
{
"epoch": 0.95,
"eval_accuracy": 0.9800970023328508,
"eval_f1": 0.7286303792493615,
"eval_loss": 0.07239069789648056,
"eval_precision": 0.7186046511627907,
"eval_recall": 0.738939816660024,
"eval_runtime": 36.2678,
"eval_samples_per_second": 82.718,
"eval_steps_per_second": 5.184,
"step": 34000
},
{
"epoch": 0.97,
"learning_rate": 2.032841597487099e-05,
"loss": 0.0591,
"step": 34500
},
{
"epoch": 0.98,
"learning_rate": 2.0188187121382093e-05,
"loss": 0.0552,
"step": 35000
},
{
"epoch": 0.98,
"eval_accuracy": 0.9813971286571815,
"eval_f1": 0.7495317890586495,
"eval_loss": 0.06262348592281342,
"eval_precision": 0.7415642676028866,
"eval_recall": 0.7576723794340374,
"eval_runtime": 37.8404,
"eval_samples_per_second": 79.28,
"eval_steps_per_second": 4.968,
"step": 35000
},
{
"epoch": 1.0,
"learning_rate": 2.0047958267893204e-05,
"loss": 0.0552,
"step": 35500
},
{
"epoch": 1.01,
"learning_rate": 1.990772941440431e-05,
"loss": 0.0533,
"step": 36000
},
{
"epoch": 1.01,
"eval_accuracy": 0.981254206969046,
"eval_f1": 0.7463703703703705,
"eval_loss": 0.06460578739643097,
"eval_precision": 0.7398668494223615,
"eval_recall": 0.752989238740534,
"eval_runtime": 37.9225,
"eval_samples_per_second": 79.109,
"eval_steps_per_second": 4.957,
"step": 36000
},
{
"epoch": 1.02,
"learning_rate": 1.9767500560915416e-05,
"loss": 0.0502,
"step": 36500
},
{
"epoch": 1.04,
"learning_rate": 1.962727170742652e-05,
"loss": 0.0495,
"step": 37000
},
{
"epoch": 1.04,
"eval_accuracy": 0.9808484937898221,
"eval_f1": 0.7483538789048964,
"eval_loss": 0.0672934502363205,
"eval_precision": 0.7436780478205255,
"eval_recall": 0.7530888800318852,
"eval_runtime": 37.432,
"eval_samples_per_second": 80.145,
"eval_steps_per_second": 5.022,
"step": 37000
},
{
"epoch": 1.05,
"learning_rate": 1.9487042853937628e-05,
"loss": 0.051,
"step": 37500
},
{
"epoch": 1.07,
"learning_rate": 1.9346814000448735e-05,
"loss": 0.0545,
"step": 38000
},
{
"epoch": 1.07,
"eval_accuracy": 0.9808992079372251,
"eval_f1": 0.7423830921929782,
"eval_loss": 0.06774791330099106,
"eval_precision": 0.7358786098874205,
"eval_recall": 0.7490035870864886,
"eval_runtime": 36.4586,
"eval_samples_per_second": 82.285,
"eval_steps_per_second": 5.157,
"step": 38000
},
{
"epoch": 1.08,
"learning_rate": 1.9206865604666817e-05,
"loss": 0.0517,
"step": 38500
},
{
"epoch": 1.09,
"learning_rate": 1.9066636751177925e-05,
"loss": 0.0506,
"step": 39000
},
{
"epoch": 1.09,
"eval_accuracy": 0.9809821947238845,
"eval_f1": 0.7475723369645586,
"eval_loss": 0.0674295574426651,
"eval_precision": 0.7397327090039996,
"eval_recall": 0.7555799123156636,
"eval_runtime": 37.4832,
"eval_samples_per_second": 80.036,
"eval_steps_per_second": 5.016,
"step": 39000
},
{
"epoch": 1.11,
"learning_rate": 1.892640789768903e-05,
"loss": 0.0513,
"step": 39500
},
{
"epoch": 1.12,
"learning_rate": 1.8786179044200137e-05,
"loss": 0.0505,
"step": 40000
},
{
"epoch": 1.12,
"eval_accuracy": 0.9814017390342182,
"eval_f1": 0.7522636188214338,
"eval_loss": 0.06595376133918762,
"eval_precision": 0.7471253071253071,
"eval_recall": 0.7574730968513352,
"eval_runtime": 37.3722,
"eval_samples_per_second": 80.274,
"eval_steps_per_second": 5.03,
"step": 40000
},
{
"epoch": 1.14,
"learning_rate": 1.864595019071124e-05,
"loss": 0.0497,
"step": 40500
},
{
"epoch": 1.15,
"learning_rate": 1.8505721337222348e-05,
"loss": 0.0563,
"step": 41000
},
{
"epoch": 1.15,
"eval_accuracy": 0.9808945975601885,
"eval_f1": 0.7516394655095903,
"eval_loss": 0.06491059809923172,
"eval_precision": 0.7439726695949244,
"eval_recall": 0.7594659226783579,
"eval_runtime": 38.0002,
"eval_samples_per_second": 78.947,
"eval_steps_per_second": 4.947,
"step": 41000
},
{
"epoch": 1.16,
"learning_rate": 1.836577294144043e-05,
"loss": 0.0523,
"step": 41500
},
{
"epoch": 1.18,
"learning_rate": 1.8225544087951538e-05,
"loss": 0.0502,
"step": 42000
},
{
"epoch": 1.18,
"eval_accuracy": 0.9805949230528073,
"eval_f1": 0.7549393414211438,
"eval_loss": 0.07190626114606857,
"eval_precision": 0.7503691308199626,
"eval_recall": 0.7595655639697091,
"eval_runtime": 37.6782,
"eval_samples_per_second": 79.622,
"eval_steps_per_second": 4.99,
"step": 42000
},
{
"epoch": 1.19,
"learning_rate": 1.8085315234462645e-05,
"loss": 0.051,
"step": 42500
},
{
"epoch": 1.21,
"learning_rate": 1.794508638097375e-05,
"loss": 0.0491,
"step": 43000
},
{
"epoch": 1.21,
"eval_accuracy": 0.9808254419046389,
"eval_f1": 0.7479091995221027,
"eval_loss": 0.06869912892580032,
"eval_precision": 0.7473139673696777,
"eval_recall": 0.7485053806297329,
"eval_runtime": 37.2482,
"eval_samples_per_second": 80.541,
"eval_steps_per_second": 5.047,
"step": 43000
},
{
"epoch": 1.22,
"learning_rate": 1.7805137985191835e-05,
"loss": 0.0497,
"step": 43500
},
{
"epoch": 1.23,
"learning_rate": 1.766490913170294e-05,
"loss": 0.051,
"step": 44000
},
{
"epoch": 1.23,
"eval_accuracy": 0.9811850513134964,
"eval_f1": 0.7476012925677354,
"eval_loss": 0.0647246241569519,
"eval_precision": 0.7460065482686774,
"eval_recall": 0.7492028696691909,
"eval_runtime": 37.6178,
"eval_samples_per_second": 79.749,
"eval_steps_per_second": 4.998,
"step": 44000
},
{
"epoch": 1.25,
"learning_rate": 1.7524960735921025e-05,
"loss": 0.0517,
"step": 44500
},
{
"epoch": 1.26,
"learning_rate": 1.738473188243213e-05,
"loss": 0.0495,
"step": 45000
},
{
"epoch": 1.26,
"eval_accuracy": 0.9815031673290242,
"eval_f1": 0.7493440269320264,
"eval_loss": 0.06404220312833786,
"eval_precision": 0.7446620092492374,
"eval_recall": 0.7540852929453966,
"eval_runtime": 37.4983,
"eval_samples_per_second": 80.004,
"eval_steps_per_second": 5.014,
"step": 45000
},
{
"epoch": 1.28,
"learning_rate": 1.7244503028943237e-05,
"loss": 0.052,
"step": 45500
},
{
"epoch": 1.29,
"learning_rate": 1.710427417545434e-05,
"loss": 0.0507,
"step": 46000
},
{
"epoch": 1.29,
"eval_accuracy": 0.9804289494794884,
"eval_f1": 0.7446007529225283,
"eval_loss": 0.06634628772735596,
"eval_precision": 0.7403467297084318,
"eval_recall": 0.7489039457951375,
"eval_runtime": 37.6634,
"eval_samples_per_second": 79.653,
"eval_steps_per_second": 4.992,
"step": 46000
},
{
"epoch": 1.3,
"learning_rate": 1.6964045321965448e-05,
"loss": 0.048,
"step": 46500
},
{
"epoch": 1.32,
"learning_rate": 1.6823816468476552e-05,
"loss": 0.0496,
"step": 47000
},
{
"epoch": 1.32,
"eval_accuracy": 0.9806825202165033,
"eval_f1": 0.7495407833986993,
"eval_loss": 0.07511032372713089,
"eval_precision": 0.7469080835064806,
"eval_recall": 0.752192108409725,
"eval_runtime": 37.4863,
"eval_samples_per_second": 80.029,
"eval_steps_per_second": 5.015,
"step": 47000
},
{
"epoch": 1.33,
"learning_rate": 1.6683868072694638e-05,
"loss": 0.0479,
"step": 47500
},
{
"epoch": 1.35,
"learning_rate": 1.6543639219205746e-05,
"loss": 0.0482,
"step": 48000
},
{
"epoch": 1.35,
"eval_accuracy": 0.9812311550838627,
"eval_f1": 0.7515115472296561,
"eval_loss": 0.06654708087444305,
"eval_precision": 0.7475843028988365,
"eval_recall": 0.7554802710243125,
"eval_runtime": 37.3402,
"eval_samples_per_second": 80.342,
"eval_steps_per_second": 5.035,
"step": 48000
},
{
"epoch": 1.36,
"learning_rate": 1.6403690823423828e-05,
"loss": 0.0496,
"step": 48500
},
{
"epoch": 1.37,
"learning_rate": 1.6263461969934935e-05,
"loss": 0.0499,
"step": 49000
},
{
"epoch": 1.37,
"eval_accuracy": 0.9817290758038192,
"eval_f1": 0.7567942181080144,
"eval_loss": 0.06324990093708038,
"eval_precision": 0.7519921298573536,
"eval_recall": 0.7616580310880829,
"eval_runtime": 37.4694,
"eval_samples_per_second": 80.065,
"eval_steps_per_second": 5.017,
"step": 49000
},
{
"epoch": 1.39,
"learning_rate": 1.612323311644604e-05,
"loss": 0.0487,
"step": 49500
},
{
"epoch": 1.4,
"learning_rate": 1.5983004262957147e-05,
"loss": 0.0492,
"step": 50000
},
{
"epoch": 1.4,
"eval_accuracy": 0.9811158956579469,
"eval_f1": 0.752822961747003,
"eval_loss": 0.07030627131462097,
"eval_precision": 0.7516638521903248,
"eval_recall": 0.7539856516540454,
"eval_runtime": 37.5303,
"eval_samples_per_second": 79.935,
"eval_steps_per_second": 5.009,
"step": 50000
},
{
"epoch": 1.42,
"learning_rate": 1.584305586717523e-05,
"loss": 0.0503,
"step": 50500
},
{
"epoch": 1.43,
"learning_rate": 1.5702827013686337e-05,
"loss": 0.0496,
"step": 51000
},
{
"epoch": 1.43,
"eval_accuracy": 0.981719855049746,
"eval_f1": 0.7585012126911844,
"eval_loss": 0.06571871042251587,
"eval_precision": 0.7536146355857185,
"eval_recall": 0.7634515743324033,
"eval_runtime": 37.9295,
"eval_samples_per_second": 79.094,
"eval_steps_per_second": 4.957,
"step": 51000
},
{
"epoch": 1.44,
"learning_rate": 1.556259816019744e-05,
"loss": 0.0477,
"step": 51500
},
{
"epoch": 1.46,
"learning_rate": 1.542236930670855e-05,
"loss": 0.0499,
"step": 52000
},
{
"epoch": 1.46,
"eval_accuracy": 0.9822085550156292,
"eval_f1": 0.7642292490118576,
"eval_loss": 0.061449870467185974,
"eval_precision": 0.757938063504508,
"eval_recall": 0.7706257473096851,
"eval_runtime": 37.5922,
"eval_samples_per_second": 79.804,
"eval_steps_per_second": 5.001,
"step": 52000
},
{
"epoch": 1.47,
"learning_rate": 1.5282140453219652e-05,
"loss": 0.0475,
"step": 52500
},
{
"epoch": 1.49,
"learning_rate": 1.514191159973076e-05,
"loss": 0.0475,
"step": 53000
},
{
"epoch": 1.49,
"eval_accuracy": 0.9815538814764272,
"eval_f1": 0.7547375191727277,
"eval_loss": 0.06496689468622208,
"eval_precision": 0.7495823095823095,
"eval_recall": 0.7599641291351136,
"eval_runtime": 37.7023,
"eval_samples_per_second": 79.571,
"eval_steps_per_second": 4.986,
"step": 53000
},
{
"epoch": 1.5,
"learning_rate": 1.5001682746241866e-05,
"loss": 0.0515,
"step": 53500
},
{
"epoch": 1.51,
"learning_rate": 1.4861453892752973e-05,
"loss": 0.048,
"step": 54000
},
{
"epoch": 1.51,
"eval_accuracy": 0.9812219343297894,
"eval_f1": 0.7529961708687652,
"eval_loss": 0.06513578444719315,
"eval_precision": 0.7516132234686786,
"eval_recall": 0.75438421681945,
"eval_runtime": 37.4039,
"eval_samples_per_second": 80.205,
"eval_steps_per_second": 5.026,
"step": 54000
},
{
"epoch": 1.53,
"learning_rate": 1.4721225039264079e-05,
"loss": 0.0471,
"step": 54500
},
{
"epoch": 1.54,
"learning_rate": 1.4580996185775185e-05,
"loss": 0.0487,
"step": 55000
},
{
"epoch": 1.54,
"eval_accuracy": 0.9822085550156292,
"eval_f1": 0.7677227722772277,
"eval_loss": 0.061350539326667786,
"eval_precision": 0.7628886265249901,
"eval_recall": 0.7726185731367079,
"eval_runtime": 37.2631,
"eval_samples_per_second": 80.509,
"eval_steps_per_second": 5.045,
"step": 55000
},
{
"epoch": 1.56,
"learning_rate": 1.444076733228629e-05,
"loss": 0.0496,
"step": 55500
},
{
"epoch": 1.57,
"learning_rate": 1.4300538478797398e-05,
"loss": 0.0491,
"step": 56000
},
{
"epoch": 1.57,
"eval_accuracy": 0.9823653078348747,
"eval_f1": 0.7622540516429598,
"eval_loss": 0.06025264039635658,
"eval_precision": 0.7583078591854847,
"eval_recall": 0.7662415304902351,
"eval_runtime": 36.0146,
"eval_samples_per_second": 83.299,
"eval_steps_per_second": 5.22,
"step": 56000
}
],
"max_steps": 106968,
"num_train_epochs": 3,
"total_flos": 3.1285620016887974e+17,
"trial_name": null,
"trial_params": null
}