kotstantinovskii's picture
Upload checkpoint-15500/trainer_state.json
b474b3f
{
"best_metric": 1.0360217094421387,
"best_model_checkpoint": "./res_2/checkpoint-12000",
"epoch": 3.7685387794796985,
"global_step": 15500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 2.5e-05,
"loss": 4.4105,
"step": 250
},
{
"epoch": 0.06,
"eval_accuracy": 0.2648573215918387,
"eval_f1_score": 0.1461888457719469,
"eval_loss": 3.3562278747558594,
"eval_runtime": 104.4517,
"eval_samples_per_second": 198.953,
"eval_steps_per_second": 2.078,
"step": 250
},
{
"epoch": 0.12,
"learning_rate": 5e-05,
"loss": 2.7264,
"step": 500
},
{
"epoch": 0.12,
"eval_accuracy": 0.4484865983350176,
"eval_f1_score": 0.36937033203465997,
"eval_loss": 2.1725332736968994,
"eval_runtime": 104.6188,
"eval_samples_per_second": 198.635,
"eval_steps_per_second": 2.074,
"step": 500
},
{
"epoch": 0.18,
"learning_rate": 4.9216399197592786e-05,
"loss": 2.0303,
"step": 750
},
{
"epoch": 0.18,
"eval_accuracy": 0.5143640825754294,
"eval_f1_score": 0.4607925258467065,
"eval_loss": 1.784591555595398,
"eval_runtime": 104.5717,
"eval_samples_per_second": 198.725,
"eval_steps_per_second": 2.075,
"step": 750
},
{
"epoch": 0.24,
"learning_rate": 4.8432798395185555e-05,
"loss": 1.777,
"step": 1000
},
{
"epoch": 0.24,
"eval_accuracy": 0.5497329291179442,
"eval_f1_score": 0.5086586604390044,
"eval_loss": 1.6212031841278076,
"eval_runtime": 104.6796,
"eval_samples_per_second": 198.52,
"eval_steps_per_second": 2.073,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 4.764919759277834e-05,
"loss": 1.635,
"step": 1250
},
{
"epoch": 0.3,
"eval_accuracy": 0.57292719310909,
"eval_f1_score": 0.5363731366139944,
"eval_loss": 1.4894088506698608,
"eval_runtime": 104.782,
"eval_samples_per_second": 198.326,
"eval_steps_per_second": 2.071,
"step": 1250
},
{
"epoch": 0.36,
"learning_rate": 4.6865596790371115e-05,
"loss": 1.5492,
"step": 1500
},
{
"epoch": 0.36,
"eval_accuracy": 0.5871228526057456,
"eval_f1_score": 0.5565777587100311,
"eval_loss": 1.4159808158874512,
"eval_runtime": 104.9221,
"eval_samples_per_second": 198.061,
"eval_steps_per_second": 2.068,
"step": 1500
},
{
"epoch": 0.43,
"learning_rate": 4.608199598796389e-05,
"loss": 1.4853,
"step": 1750
},
{
"epoch": 0.43,
"eval_accuracy": 0.6005004571483566,
"eval_f1_score": 0.5743468856693701,
"eval_loss": 1.3664453029632568,
"eval_runtime": 105.1103,
"eval_samples_per_second": 197.707,
"eval_steps_per_second": 2.064,
"step": 1750
},
{
"epoch": 0.49,
"learning_rate": 4.5298395185556675e-05,
"loss": 1.4178,
"step": 2000
},
{
"epoch": 0.49,
"eval_accuracy": 0.6162841056734517,
"eval_f1_score": 0.5926712498507521,
"eval_loss": 1.3030893802642822,
"eval_runtime": 104.7919,
"eval_samples_per_second": 198.307,
"eval_steps_per_second": 2.071,
"step": 2000
},
{
"epoch": 0.55,
"learning_rate": 4.451479438314945e-05,
"loss": 1.4012,
"step": 2250
},
{
"epoch": 0.55,
"eval_accuracy": 0.609932149559694,
"eval_f1_score": 0.5892368884639717,
"eval_loss": 1.3101539611816406,
"eval_runtime": 105.0064,
"eval_samples_per_second": 197.902,
"eval_steps_per_second": 2.067,
"step": 2250
},
{
"epoch": 0.61,
"learning_rate": 4.373119358074223e-05,
"loss": 1.363,
"step": 2500
},
{
"epoch": 0.61,
"eval_accuracy": 0.6175833694239931,
"eval_f1_score": 0.596718851218216,
"eval_loss": 1.2853854894638062,
"eval_runtime": 104.962,
"eval_samples_per_second": 197.986,
"eval_steps_per_second": 2.067,
"step": 2500
},
{
"epoch": 0.67,
"learning_rate": 4.2947592778335004e-05,
"loss": 1.3349,
"step": 2750
},
{
"epoch": 0.67,
"eval_accuracy": 0.6279293585486743,
"eval_f1_score": 0.6094187627596529,
"eval_loss": 1.230573058128357,
"eval_runtime": 104.847,
"eval_samples_per_second": 198.203,
"eval_steps_per_second": 2.07,
"step": 2750
},
{
"epoch": 0.73,
"learning_rate": 4.216399197592779e-05,
"loss": 1.3324,
"step": 3000
},
{
"epoch": 0.73,
"eval_accuracy": 0.6324046003560945,
"eval_f1_score": 0.6161864856783154,
"eval_loss": 1.2122113704681396,
"eval_runtime": 105.0625,
"eval_samples_per_second": 197.796,
"eval_steps_per_second": 2.065,
"step": 3000
},
{
"epoch": 0.79,
"learning_rate": 4.1380391173520564e-05,
"loss": 1.292,
"step": 3250
},
{
"epoch": 0.79,
"eval_accuracy": 0.6309128530869544,
"eval_f1_score": 0.6119500434777825,
"eval_loss": 1.20195472240448,
"eval_runtime": 105.1108,
"eval_samples_per_second": 197.706,
"eval_steps_per_second": 2.064,
"step": 3250
},
{
"epoch": 0.85,
"learning_rate": 4.059679037111334e-05,
"loss": 1.2693,
"step": 3500
},
{
"epoch": 0.85,
"eval_accuracy": 0.6373129300803618,
"eval_f1_score": 0.624172345328055,
"eval_loss": 1.1805213689804077,
"eval_runtime": 104.8144,
"eval_samples_per_second": 198.265,
"eval_steps_per_second": 2.07,
"step": 3500
},
{
"epoch": 0.91,
"learning_rate": 3.9813189568706124e-05,
"loss": 1.2729,
"step": 3750
},
{
"epoch": 0.91,
"eval_accuracy": 0.6417881718877821,
"eval_f1_score": 0.6227824246172433,
"eval_loss": 1.1760473251342773,
"eval_runtime": 104.7877,
"eval_samples_per_second": 198.315,
"eval_steps_per_second": 2.071,
"step": 3750
},
{
"epoch": 0.97,
"learning_rate": 3.90295887662989e-05,
"loss": 1.2382,
"step": 4000
},
{
"epoch": 0.97,
"eval_accuracy": 0.6395746114238968,
"eval_f1_score": 0.6232000558005116,
"eval_loss": 1.1657843589782715,
"eval_runtime": 104.9017,
"eval_samples_per_second": 198.1,
"eval_steps_per_second": 2.069,
"step": 4000
},
{
"epoch": 1.03,
"learning_rate": 3.8245987963891677e-05,
"loss": 1.175,
"step": 4250
},
{
"epoch": 1.03,
"eval_accuracy": 0.6491025455945335,
"eval_f1_score": 0.6333235301999404,
"eval_loss": 1.1377447843551636,
"eval_runtime": 104.8132,
"eval_samples_per_second": 198.267,
"eval_steps_per_second": 2.07,
"step": 4250
},
{
"epoch": 1.09,
"learning_rate": 3.746238716148446e-05,
"loss": 1.1071,
"step": 4500
},
{
"epoch": 1.09,
"eval_accuracy": 0.6498243587892787,
"eval_f1_score": 0.6338669978734944,
"eval_loss": 1.134881854057312,
"eval_runtime": 104.9571,
"eval_samples_per_second": 197.995,
"eval_steps_per_second": 2.068,
"step": 4500
},
{
"epoch": 1.15,
"learning_rate": 3.6678786359077236e-05,
"loss": 1.1257,
"step": 4750
},
{
"epoch": 1.15,
"eval_accuracy": 0.6517491939752659,
"eval_f1_score": 0.6390275803026418,
"eval_loss": 1.1246434450149536,
"eval_runtime": 118.4939,
"eval_samples_per_second": 175.376,
"eval_steps_per_second": 1.831,
"step": 4750
},
{
"epoch": 1.22,
"learning_rate": 3.589518555667001e-05,
"loss": 1.0846,
"step": 5000
},
{
"epoch": 1.22,
"eval_accuracy": 0.6530484577258072,
"eval_f1_score": 0.6374295817702883,
"eval_loss": 1.134043574333191,
"eval_runtime": 104.8506,
"eval_samples_per_second": 198.196,
"eval_steps_per_second": 2.07,
"step": 5000
},
{
"epoch": 1.28,
"learning_rate": 3.511158475426279e-05,
"loss": 1.1093,
"step": 5250
},
{
"epoch": 1.28,
"eval_accuracy": 0.6527116115682595,
"eval_f1_score": 0.6433227186941691,
"eval_loss": 1.1218096017837524,
"eval_runtime": 105.0759,
"eval_samples_per_second": 197.771,
"eval_steps_per_second": 2.065,
"step": 5250
},
{
"epoch": 1.34,
"learning_rate": 3.4327983951855566e-05,
"loss": 1.0895,
"step": 5500
},
{
"epoch": 1.34,
"eval_accuracy": 0.6540108753188009,
"eval_f1_score": 0.640048503150472,
"eval_loss": 1.120483160018921,
"eval_runtime": 105.0798,
"eval_samples_per_second": 197.764,
"eval_steps_per_second": 2.065,
"step": 5500
},
{
"epoch": 1.4,
"learning_rate": 3.354438314944834e-05,
"loss": 1.0947,
"step": 5750
},
{
"epoch": 1.4,
"eval_accuracy": 0.6575236995332274,
"eval_f1_score": 0.6418486412023349,
"eval_loss": 1.1041547060012817,
"eval_runtime": 105.0665,
"eval_samples_per_second": 197.789,
"eval_steps_per_second": 2.065,
"step": 5750
},
{
"epoch": 1.46,
"learning_rate": 3.2760782347041125e-05,
"loss": 1.0784,
"step": 6000
},
{
"epoch": 1.46,
"eval_accuracy": 0.6592079303209663,
"eval_f1_score": 0.645978450692562,
"eval_loss": 1.0955352783203125,
"eval_runtime": 104.9867,
"eval_samples_per_second": 197.939,
"eval_steps_per_second": 2.067,
"step": 6000
},
{
"epoch": 1.52,
"learning_rate": 3.19771815446339e-05,
"loss": 1.0838,
"step": 6250
},
{
"epoch": 1.52,
"eval_accuracy": 0.6625763918964439,
"eval_f1_score": 0.6483699052640116,
"eval_loss": 1.095639944076538,
"eval_runtime": 104.9633,
"eval_samples_per_second": 197.983,
"eval_steps_per_second": 2.067,
"step": 6250
},
{
"epoch": 1.58,
"learning_rate": 3.119358074222668e-05,
"loss": 1.0764,
"step": 6500
},
{
"epoch": 1.58,
"eval_accuracy": 0.6629132380539916,
"eval_f1_score": 0.6497593784369542,
"eval_loss": 1.0892637968063354,
"eval_runtime": 105.0455,
"eval_samples_per_second": 197.829,
"eval_steps_per_second": 2.066,
"step": 6500
},
{
"epoch": 1.64,
"learning_rate": 3.040997993981946e-05,
"loss": 1.0563,
"step": 6750
},
{
"epoch": 1.64,
"eval_accuracy": 0.6619508204609981,
"eval_f1_score": 0.6484351513187849,
"eval_loss": 1.089970350265503,
"eval_runtime": 105.3681,
"eval_samples_per_second": 197.223,
"eval_steps_per_second": 2.059,
"step": 6750
},
{
"epoch": 1.7,
"learning_rate": 2.9626379137412235e-05,
"loss": 1.066,
"step": 7000
},
{
"epoch": 1.7,
"eval_accuracy": 0.6659929743515711,
"eval_f1_score": 0.6533335696048064,
"eval_loss": 1.0745749473571777,
"eval_runtime": 105.1237,
"eval_samples_per_second": 197.681,
"eval_steps_per_second": 2.064,
"step": 7000
},
{
"epoch": 1.76,
"learning_rate": 2.8842778335005018e-05,
"loss": 1.0603,
"step": 7250
},
{
"epoch": 1.76,
"eval_accuracy": 0.6680140512968578,
"eval_f1_score": 0.657494747880274,
"eval_loss": 1.0701572895050049,
"eval_runtime": 105.023,
"eval_samples_per_second": 197.871,
"eval_steps_per_second": 2.066,
"step": 7250
},
{
"epoch": 1.82,
"learning_rate": 2.8059177532597798e-05,
"loss": 1.0624,
"step": 7500
},
{
"epoch": 1.82,
"eval_accuracy": 0.6685915018526538,
"eval_f1_score": 0.6554959146242652,
"eval_loss": 1.0651373863220215,
"eval_runtime": 105.2005,
"eval_samples_per_second": 197.537,
"eval_steps_per_second": 2.063,
"step": 7500
},
{
"epoch": 1.88,
"learning_rate": 2.727557673019057e-05,
"loss": 1.0477,
"step": 7750
},
{
"epoch": 1.88,
"eval_accuracy": 0.6685915018526538,
"eval_f1_score": 0.655750687605972,
"eval_loss": 1.059809684753418,
"eval_runtime": 105.0102,
"eval_samples_per_second": 197.895,
"eval_steps_per_second": 2.066,
"step": 7750
},
{
"epoch": 1.95,
"learning_rate": 2.649197592778335e-05,
"loss": 1.0455,
"step": 8000
},
{
"epoch": 1.95,
"eval_accuracy": 0.6643087435638323,
"eval_f1_score": 0.6529621516807526,
"eval_loss": 1.0628999471664429,
"eval_runtime": 104.8979,
"eval_samples_per_second": 198.107,
"eval_steps_per_second": 2.069,
"step": 8000
},
{
"epoch": 2.01,
"learning_rate": 2.5708375125376127e-05,
"loss": 1.0403,
"step": 8250
},
{
"epoch": 2.01,
"eval_accuracy": 0.6683990183340551,
"eval_f1_score": 0.655431489724863,
"eval_loss": 1.0640465021133423,
"eval_runtime": 104.8293,
"eval_samples_per_second": 198.237,
"eval_steps_per_second": 2.07,
"step": 8250
},
{
"epoch": 2.07,
"learning_rate": 2.4924774322968907e-05,
"loss": 0.9083,
"step": 8500
},
{
"epoch": 2.07,
"eval_accuracy": 0.6707569414368895,
"eval_f1_score": 0.6605996988642956,
"eval_loss": 1.0650361776351929,
"eval_runtime": 104.9496,
"eval_samples_per_second": 198.009,
"eval_steps_per_second": 2.068,
"step": 8500
},
{
"epoch": 2.13,
"learning_rate": 2.4141173520561687e-05,
"loss": 0.9045,
"step": 8750
},
{
"epoch": 2.13,
"eval_accuracy": 0.6696501612049468,
"eval_f1_score": 0.6590246628667181,
"eval_loss": 1.0609605312347412,
"eval_runtime": 105.2135,
"eval_samples_per_second": 197.513,
"eval_steps_per_second": 2.062,
"step": 8750
},
{
"epoch": 2.19,
"learning_rate": 2.3357572718154463e-05,
"loss": 0.8998,
"step": 9000
},
{
"epoch": 2.19,
"eval_accuracy": 0.6709975458351378,
"eval_f1_score": 0.6609721373546371,
"eval_loss": 1.0593194961547852,
"eval_runtime": 104.8728,
"eval_samples_per_second": 198.154,
"eval_steps_per_second": 2.069,
"step": 9000
},
{
"epoch": 2.25,
"learning_rate": 2.2573971915747243e-05,
"loss": 0.8994,
"step": 9250
},
{
"epoch": 2.25,
"eval_accuracy": 0.6718156007891825,
"eval_f1_score": 0.6593984410226574,
"eval_loss": 1.064655065536499,
"eval_runtime": 104.9299,
"eval_samples_per_second": 198.046,
"eval_steps_per_second": 2.068,
"step": 9250
},
{
"epoch": 2.31,
"learning_rate": 2.1790371113340023e-05,
"loss": 0.9093,
"step": 9500
},
{
"epoch": 2.31,
"eval_accuracy": 0.668880227130552,
"eval_f1_score": 0.6589669423489957,
"eval_loss": 1.0653289556503296,
"eval_runtime": 105.0839,
"eval_samples_per_second": 197.756,
"eval_steps_per_second": 2.065,
"step": 9500
},
{
"epoch": 2.37,
"learning_rate": 2.10067703109328e-05,
"loss": 0.8864,
"step": 9750
},
{
"epoch": 2.37,
"eval_accuracy": 0.674895337086762,
"eval_f1_score": 0.664902640512849,
"eval_loss": 1.050571322441101,
"eval_runtime": 105.0155,
"eval_samples_per_second": 197.885,
"eval_steps_per_second": 2.066,
"step": 9750
},
{
"epoch": 2.43,
"learning_rate": 2.0223169508525576e-05,
"loss": 0.8905,
"step": 10000
},
{
"epoch": 2.43,
"eval_accuracy": 0.6742697656513161,
"eval_f1_score": 0.6644634718259311,
"eval_loss": 1.0541752576828003,
"eval_runtime": 104.7466,
"eval_samples_per_second": 198.393,
"eval_steps_per_second": 2.072,
"step": 10000
},
{
"epoch": 2.49,
"learning_rate": 1.9439568706118356e-05,
"loss": 0.8981,
"step": 10250
},
{
"epoch": 2.49,
"eval_accuracy": 0.6703238535200423,
"eval_f1_score": 0.6590618499027966,
"eval_loss": 1.0549697875976562,
"eval_runtime": 105.0016,
"eval_samples_per_second": 197.911,
"eval_steps_per_second": 2.067,
"step": 10250
},
{
"epoch": 2.55,
"learning_rate": 1.8655967903711136e-05,
"loss": 0.9038,
"step": 10500
},
{
"epoch": 2.55,
"eval_accuracy": 0.671286271113036,
"eval_f1_score": 0.6622994563067591,
"eval_loss": 1.047540307044983,
"eval_runtime": 105.0268,
"eval_samples_per_second": 197.864,
"eval_steps_per_second": 2.066,
"step": 10500
},
{
"epoch": 2.61,
"learning_rate": 1.7872367101303912e-05,
"loss": 0.8884,
"step": 10750
},
{
"epoch": 2.61,
"eval_accuracy": 0.6739329194937683,
"eval_f1_score": 0.6640063633487168,
"eval_loss": 1.0482571125030518,
"eval_runtime": 104.9667,
"eval_samples_per_second": 197.977,
"eval_steps_per_second": 2.067,
"step": 10750
},
{
"epoch": 2.67,
"learning_rate": 1.7088766298896692e-05,
"loss": 0.8993,
"step": 11000
},
{
"epoch": 2.67,
"eval_accuracy": 0.6732592271786728,
"eval_f1_score": 0.6644247957468762,
"eval_loss": 1.0455670356750488,
"eval_runtime": 104.9075,
"eval_samples_per_second": 198.089,
"eval_steps_per_second": 2.068,
"step": 11000
},
{
"epoch": 2.74,
"learning_rate": 1.630516549648947e-05,
"loss": 0.8881,
"step": 11250
},
{
"epoch": 2.74,
"eval_accuracy": 0.6758577546797555,
"eval_f1_score": 0.6663120997306459,
"eval_loss": 1.0385903120040894,
"eval_runtime": 104.9411,
"eval_samples_per_second": 198.025,
"eval_steps_per_second": 2.068,
"step": 11250
},
{
"epoch": 2.8,
"learning_rate": 1.552156469408225e-05,
"loss": 0.8774,
"step": 11500
},
{
"epoch": 2.8,
"eval_accuracy": 0.677590106347144,
"eval_f1_score": 0.6679638313459818,
"eval_loss": 1.0412800312042236,
"eval_runtime": 104.9104,
"eval_samples_per_second": 198.083,
"eval_steps_per_second": 2.068,
"step": 11500
},
{
"epoch": 2.86,
"learning_rate": 1.4737963891675025e-05,
"loss": 0.8725,
"step": 11750
},
{
"epoch": 2.86,
"eval_accuracy": 0.6772051393099466,
"eval_f1_score": 0.6684538458032429,
"eval_loss": 1.041609287261963,
"eval_runtime": 104.9692,
"eval_samples_per_second": 197.972,
"eval_steps_per_second": 2.067,
"step": 11750
},
{
"epoch": 2.92,
"learning_rate": 1.3954363089267805e-05,
"loss": 0.8814,
"step": 12000
},
{
"epoch": 2.92,
"eval_accuracy": 0.6780231942639912,
"eval_f1_score": 0.668403494357387,
"eval_loss": 1.0360217094421387,
"eval_runtime": 104.9925,
"eval_samples_per_second": 197.929,
"eval_steps_per_second": 2.067,
"step": 12000
},
{
"epoch": 2.98,
"learning_rate": 1.3170762286860583e-05,
"loss": 0.8707,
"step": 12250
},
{
"epoch": 2.98,
"eval_accuracy": 0.6776382272267937,
"eval_f1_score": 0.6678552195861074,
"eval_loss": 1.036238193511963,
"eval_runtime": 104.9928,
"eval_samples_per_second": 197.928,
"eval_steps_per_second": 2.067,
"step": 12250
},
{
"epoch": 3.04,
"learning_rate": 1.2387161484453361e-05,
"loss": 0.803,
"step": 12500
},
{
"epoch": 3.04,
"eval_accuracy": 0.678504403060488,
"eval_f1_score": 0.6693071368875216,
"eval_loss": 1.048478364944458,
"eval_runtime": 104.9691,
"eval_samples_per_second": 197.973,
"eval_steps_per_second": 2.067,
"step": 12500
},
{
"epoch": 3.1,
"learning_rate": 1.160356068204614e-05,
"loss": 0.7647,
"step": 12750
},
{
"epoch": 3.1,
"eval_accuracy": 0.6773976228285453,
"eval_f1_score": 0.667836848673267,
"eval_loss": 1.0551421642303467,
"eval_runtime": 104.9474,
"eval_samples_per_second": 198.013,
"eval_steps_per_second": 2.068,
"step": 12750
},
{
"epoch": 3.16,
"learning_rate": 1.0819959879638917e-05,
"loss": 0.7595,
"step": 13000
},
{
"epoch": 3.16,
"eval_accuracy": 0.6766758096338001,
"eval_f1_score": 0.6681691840022692,
"eval_loss": 1.0496253967285156,
"eval_runtime": 104.8372,
"eval_samples_per_second": 198.222,
"eval_steps_per_second": 2.07,
"step": 13000
},
{
"epoch": 3.22,
"learning_rate": 1.0036359077231696e-05,
"loss": 0.7628,
"step": 13250
},
{
"epoch": 3.22,
"eval_accuracy": 0.6778788316250421,
"eval_f1_score": 0.6698784049603582,
"eval_loss": 1.0489078760147095,
"eval_runtime": 104.8411,
"eval_samples_per_second": 198.214,
"eval_steps_per_second": 2.07,
"step": 13250
},
{
"epoch": 3.28,
"learning_rate": 9.252758274824474e-06,
"loss": 0.7711,
"step": 13500
},
{
"epoch": 3.28,
"eval_accuracy": 0.6778307107453925,
"eval_f1_score": 0.670537104393122,
"eval_loss": 1.0493261814117432,
"eval_runtime": 104.9371,
"eval_samples_per_second": 198.033,
"eval_steps_per_second": 2.068,
"step": 13500
},
{
"epoch": 3.34,
"learning_rate": 8.469157472417252e-06,
"loss": 0.7659,
"step": 13750
},
{
"epoch": 3.34,
"eval_accuracy": 0.677445743708195,
"eval_f1_score": 0.6690005925034824,
"eval_loss": 1.0476195812225342,
"eval_runtime": 104.8587,
"eval_samples_per_second": 198.181,
"eval_steps_per_second": 2.069,
"step": 13750
},
{
"epoch": 3.4,
"learning_rate": 7.68555667001003e-06,
"loss": 0.7466,
"step": 14000
},
{
"epoch": 3.4,
"eval_accuracy": 0.6767720513930995,
"eval_f1_score": 0.6682514083974397,
"eval_loss": 1.0503697395324707,
"eval_runtime": 104.8988,
"eval_samples_per_second": 198.105,
"eval_steps_per_second": 2.069,
"step": 14000
},
{
"epoch": 3.46,
"learning_rate": 6.901955867602809e-06,
"loss": 0.7438,
"step": 14250
},
{
"epoch": 3.46,
"eval_accuracy": 0.6778788316250421,
"eval_f1_score": 0.6704394720403158,
"eval_loss": 1.0488700866699219,
"eval_runtime": 104.8654,
"eval_samples_per_second": 198.168,
"eval_steps_per_second": 2.069,
"step": 14250
},
{
"epoch": 3.53,
"learning_rate": 6.118355065195587e-06,
"loss": 0.7492,
"step": 14500
},
{
"epoch": 3.53,
"eval_accuracy": 0.6762908425966027,
"eval_f1_score": 0.668875699854832,
"eval_loss": 1.0487704277038574,
"eval_runtime": 105.1614,
"eval_samples_per_second": 197.611,
"eval_steps_per_second": 2.063,
"step": 14500
},
{
"epoch": 3.59,
"learning_rate": 5.334754262788365e-06,
"loss": 0.7534,
"step": 14750
},
{
"epoch": 3.59,
"eval_accuracy": 0.6767239305134498,
"eval_f1_score": 0.6693352825093323,
"eval_loss": 1.0485919713974,
"eval_runtime": 105.4042,
"eval_samples_per_second": 197.155,
"eval_steps_per_second": 2.059,
"step": 14750
},
{
"epoch": 3.65,
"learning_rate": 4.551153460381144e-06,
"loss": 0.7391,
"step": 15000
},
{
"epoch": 3.65,
"eval_accuracy": 0.6791299744959338,
"eval_f1_score": 0.6716788527298224,
"eval_loss": 1.0460212230682373,
"eval_runtime": 105.024,
"eval_samples_per_second": 197.869,
"eval_steps_per_second": 2.066,
"step": 15000
},
{
"epoch": 3.71,
"learning_rate": 3.7675526579739217e-06,
"loss": 0.7543,
"step": 15250
},
{
"epoch": 3.71,
"eval_accuracy": 0.6804773591261248,
"eval_f1_score": 0.6726007691455318,
"eval_loss": 1.045469880104065,
"eval_runtime": 104.6353,
"eval_samples_per_second": 198.604,
"eval_steps_per_second": 2.074,
"step": 15250
},
{
"epoch": 3.77,
"learning_rate": 2.9839518555667003e-06,
"loss": 0.7382,
"step": 15500
},
{
"epoch": 3.77,
"eval_accuracy": 0.6802848756075262,
"eval_f1_score": 0.6728396464392151,
"eval_loss": 1.0436148643493652,
"eval_runtime": 105.0033,
"eval_samples_per_second": 197.908,
"eval_steps_per_second": 2.067,
"step": 15500
}
],
"max_steps": 16452,
"num_train_epochs": 4,
"total_flos": 1.9763710480077926e+17,
"trial_name": null,
"trial_params": null
}