Spaces:
Runtime error
Runtime error
{ | |
"best_metric": 1.0360217094421387, | |
"best_model_checkpoint": "./res_2/checkpoint-12000", | |
"epoch": 3.7685387794796985, | |
"global_step": 15500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.06, | |
"learning_rate": 2.5e-05, | |
"loss": 4.4105, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.06, | |
"eval_accuracy": 0.2648573215918387, | |
"eval_f1_score": 0.1461888457719469, | |
"eval_loss": 3.3562278747558594, | |
"eval_runtime": 104.4517, | |
"eval_samples_per_second": 198.953, | |
"eval_steps_per_second": 2.078, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 2.7264, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.12, | |
"eval_accuracy": 0.4484865983350176, | |
"eval_f1_score": 0.36937033203465997, | |
"eval_loss": 2.1725332736968994, | |
"eval_runtime": 104.6188, | |
"eval_samples_per_second": 198.635, | |
"eval_steps_per_second": 2.074, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 4.9216399197592786e-05, | |
"loss": 2.0303, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_accuracy": 0.5143640825754294, | |
"eval_f1_score": 0.4607925258467065, | |
"eval_loss": 1.784591555595398, | |
"eval_runtime": 104.5717, | |
"eval_samples_per_second": 198.725, | |
"eval_steps_per_second": 2.075, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 4.8432798395185555e-05, | |
"loss": 1.777, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.24, | |
"eval_accuracy": 0.5497329291179442, | |
"eval_f1_score": 0.5086586604390044, | |
"eval_loss": 1.6212031841278076, | |
"eval_runtime": 104.6796, | |
"eval_samples_per_second": 198.52, | |
"eval_steps_per_second": 2.073, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 4.764919759277834e-05, | |
"loss": 1.635, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.3, | |
"eval_accuracy": 0.57292719310909, | |
"eval_f1_score": 0.5363731366139944, | |
"eval_loss": 1.4894088506698608, | |
"eval_runtime": 104.782, | |
"eval_samples_per_second": 198.326, | |
"eval_steps_per_second": 2.071, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 4.6865596790371115e-05, | |
"loss": 1.5492, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.36, | |
"eval_accuracy": 0.5871228526057456, | |
"eval_f1_score": 0.5565777587100311, | |
"eval_loss": 1.4159808158874512, | |
"eval_runtime": 104.9221, | |
"eval_samples_per_second": 198.061, | |
"eval_steps_per_second": 2.068, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 4.608199598796389e-05, | |
"loss": 1.4853, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 0.43, | |
"eval_accuracy": 0.6005004571483566, | |
"eval_f1_score": 0.5743468856693701, | |
"eval_loss": 1.3664453029632568, | |
"eval_runtime": 105.1103, | |
"eval_samples_per_second": 197.707, | |
"eval_steps_per_second": 2.064, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 4.5298395185556675e-05, | |
"loss": 1.4178, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.49, | |
"eval_accuracy": 0.6162841056734517, | |
"eval_f1_score": 0.5926712498507521, | |
"eval_loss": 1.3030893802642822, | |
"eval_runtime": 104.7919, | |
"eval_samples_per_second": 198.307, | |
"eval_steps_per_second": 2.071, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 4.451479438314945e-05, | |
"loss": 1.4012, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 0.55, | |
"eval_accuracy": 0.609932149559694, | |
"eval_f1_score": 0.5892368884639717, | |
"eval_loss": 1.3101539611816406, | |
"eval_runtime": 105.0064, | |
"eval_samples_per_second": 197.902, | |
"eval_steps_per_second": 2.067, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 4.373119358074223e-05, | |
"loss": 1.363, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.61, | |
"eval_accuracy": 0.6175833694239931, | |
"eval_f1_score": 0.596718851218216, | |
"eval_loss": 1.2853854894638062, | |
"eval_runtime": 104.962, | |
"eval_samples_per_second": 197.986, | |
"eval_steps_per_second": 2.067, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 4.2947592778335004e-05, | |
"loss": 1.3349, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 0.67, | |
"eval_accuracy": 0.6279293585486743, | |
"eval_f1_score": 0.6094187627596529, | |
"eval_loss": 1.230573058128357, | |
"eval_runtime": 104.847, | |
"eval_samples_per_second": 198.203, | |
"eval_steps_per_second": 2.07, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 4.216399197592779e-05, | |
"loss": 1.3324, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.73, | |
"eval_accuracy": 0.6324046003560945, | |
"eval_f1_score": 0.6161864856783154, | |
"eval_loss": 1.2122113704681396, | |
"eval_runtime": 105.0625, | |
"eval_samples_per_second": 197.796, | |
"eval_steps_per_second": 2.065, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 4.1380391173520564e-05, | |
"loss": 1.292, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 0.79, | |
"eval_accuracy": 0.6309128530869544, | |
"eval_f1_score": 0.6119500434777825, | |
"eval_loss": 1.20195472240448, | |
"eval_runtime": 105.1108, | |
"eval_samples_per_second": 197.706, | |
"eval_steps_per_second": 2.064, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 4.059679037111334e-05, | |
"loss": 1.2693, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.85, | |
"eval_accuracy": 0.6373129300803618, | |
"eval_f1_score": 0.624172345328055, | |
"eval_loss": 1.1805213689804077, | |
"eval_runtime": 104.8144, | |
"eval_samples_per_second": 198.265, | |
"eval_steps_per_second": 2.07, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 3.9813189568706124e-05, | |
"loss": 1.2729, | |
"step": 3750 | |
}, | |
{ | |
"epoch": 0.91, | |
"eval_accuracy": 0.6417881718877821, | |
"eval_f1_score": 0.6227824246172433, | |
"eval_loss": 1.1760473251342773, | |
"eval_runtime": 104.7877, | |
"eval_samples_per_second": 198.315, | |
"eval_steps_per_second": 2.071, | |
"step": 3750 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 3.90295887662989e-05, | |
"loss": 1.2382, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.97, | |
"eval_accuracy": 0.6395746114238968, | |
"eval_f1_score": 0.6232000558005116, | |
"eval_loss": 1.1657843589782715, | |
"eval_runtime": 104.9017, | |
"eval_samples_per_second": 198.1, | |
"eval_steps_per_second": 2.069, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 3.8245987963891677e-05, | |
"loss": 1.175, | |
"step": 4250 | |
}, | |
{ | |
"epoch": 1.03, | |
"eval_accuracy": 0.6491025455945335, | |
"eval_f1_score": 0.6333235301999404, | |
"eval_loss": 1.1377447843551636, | |
"eval_runtime": 104.8132, | |
"eval_samples_per_second": 198.267, | |
"eval_steps_per_second": 2.07, | |
"step": 4250 | |
}, | |
{ | |
"epoch": 1.09, | |
"learning_rate": 3.746238716148446e-05, | |
"loss": 1.1071, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 1.09, | |
"eval_accuracy": 0.6498243587892787, | |
"eval_f1_score": 0.6338669978734944, | |
"eval_loss": 1.134881854057312, | |
"eval_runtime": 104.9571, | |
"eval_samples_per_second": 197.995, | |
"eval_steps_per_second": 2.068, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 1.15, | |
"learning_rate": 3.6678786359077236e-05, | |
"loss": 1.1257, | |
"step": 4750 | |
}, | |
{ | |
"epoch": 1.15, | |
"eval_accuracy": 0.6517491939752659, | |
"eval_f1_score": 0.6390275803026418, | |
"eval_loss": 1.1246434450149536, | |
"eval_runtime": 118.4939, | |
"eval_samples_per_second": 175.376, | |
"eval_steps_per_second": 1.831, | |
"step": 4750 | |
}, | |
{ | |
"epoch": 1.22, | |
"learning_rate": 3.589518555667001e-05, | |
"loss": 1.0846, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 1.22, | |
"eval_accuracy": 0.6530484577258072, | |
"eval_f1_score": 0.6374295817702883, | |
"eval_loss": 1.134043574333191, | |
"eval_runtime": 104.8506, | |
"eval_samples_per_second": 198.196, | |
"eval_steps_per_second": 2.07, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 3.511158475426279e-05, | |
"loss": 1.1093, | |
"step": 5250 | |
}, | |
{ | |
"epoch": 1.28, | |
"eval_accuracy": 0.6527116115682595, | |
"eval_f1_score": 0.6433227186941691, | |
"eval_loss": 1.1218096017837524, | |
"eval_runtime": 105.0759, | |
"eval_samples_per_second": 197.771, | |
"eval_steps_per_second": 2.065, | |
"step": 5250 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 3.4327983951855566e-05, | |
"loss": 1.0895, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 1.34, | |
"eval_accuracy": 0.6540108753188009, | |
"eval_f1_score": 0.640048503150472, | |
"eval_loss": 1.120483160018921, | |
"eval_runtime": 105.0798, | |
"eval_samples_per_second": 197.764, | |
"eval_steps_per_second": 2.065, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 3.354438314944834e-05, | |
"loss": 1.0947, | |
"step": 5750 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_accuracy": 0.6575236995332274, | |
"eval_f1_score": 0.6418486412023349, | |
"eval_loss": 1.1041547060012817, | |
"eval_runtime": 105.0665, | |
"eval_samples_per_second": 197.789, | |
"eval_steps_per_second": 2.065, | |
"step": 5750 | |
}, | |
{ | |
"epoch": 1.46, | |
"learning_rate": 3.2760782347041125e-05, | |
"loss": 1.0784, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 1.46, | |
"eval_accuracy": 0.6592079303209663, | |
"eval_f1_score": 0.645978450692562, | |
"eval_loss": 1.0955352783203125, | |
"eval_runtime": 104.9867, | |
"eval_samples_per_second": 197.939, | |
"eval_steps_per_second": 2.067, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 3.19771815446339e-05, | |
"loss": 1.0838, | |
"step": 6250 | |
}, | |
{ | |
"epoch": 1.52, | |
"eval_accuracy": 0.6625763918964439, | |
"eval_f1_score": 0.6483699052640116, | |
"eval_loss": 1.095639944076538, | |
"eval_runtime": 104.9633, | |
"eval_samples_per_second": 197.983, | |
"eval_steps_per_second": 2.067, | |
"step": 6250 | |
}, | |
{ | |
"epoch": 1.58, | |
"learning_rate": 3.119358074222668e-05, | |
"loss": 1.0764, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 1.58, | |
"eval_accuracy": 0.6629132380539916, | |
"eval_f1_score": 0.6497593784369542, | |
"eval_loss": 1.0892637968063354, | |
"eval_runtime": 105.0455, | |
"eval_samples_per_second": 197.829, | |
"eval_steps_per_second": 2.066, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 3.040997993981946e-05, | |
"loss": 1.0563, | |
"step": 6750 | |
}, | |
{ | |
"epoch": 1.64, | |
"eval_accuracy": 0.6619508204609981, | |
"eval_f1_score": 0.6484351513187849, | |
"eval_loss": 1.089970350265503, | |
"eval_runtime": 105.3681, | |
"eval_samples_per_second": 197.223, | |
"eval_steps_per_second": 2.059, | |
"step": 6750 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 2.9626379137412235e-05, | |
"loss": 1.066, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 1.7, | |
"eval_accuracy": 0.6659929743515711, | |
"eval_f1_score": 0.6533335696048064, | |
"eval_loss": 1.0745749473571777, | |
"eval_runtime": 105.1237, | |
"eval_samples_per_second": 197.681, | |
"eval_steps_per_second": 2.064, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 1.76, | |
"learning_rate": 2.8842778335005018e-05, | |
"loss": 1.0603, | |
"step": 7250 | |
}, | |
{ | |
"epoch": 1.76, | |
"eval_accuracy": 0.6680140512968578, | |
"eval_f1_score": 0.657494747880274, | |
"eval_loss": 1.0701572895050049, | |
"eval_runtime": 105.023, | |
"eval_samples_per_second": 197.871, | |
"eval_steps_per_second": 2.066, | |
"step": 7250 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 2.8059177532597798e-05, | |
"loss": 1.0624, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 1.82, | |
"eval_accuracy": 0.6685915018526538, | |
"eval_f1_score": 0.6554959146242652, | |
"eval_loss": 1.0651373863220215, | |
"eval_runtime": 105.2005, | |
"eval_samples_per_second": 197.537, | |
"eval_steps_per_second": 2.063, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 2.727557673019057e-05, | |
"loss": 1.0477, | |
"step": 7750 | |
}, | |
{ | |
"epoch": 1.88, | |
"eval_accuracy": 0.6685915018526538, | |
"eval_f1_score": 0.655750687605972, | |
"eval_loss": 1.059809684753418, | |
"eval_runtime": 105.0102, | |
"eval_samples_per_second": 197.895, | |
"eval_steps_per_second": 2.066, | |
"step": 7750 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 2.649197592778335e-05, | |
"loss": 1.0455, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 1.95, | |
"eval_accuracy": 0.6643087435638323, | |
"eval_f1_score": 0.6529621516807526, | |
"eval_loss": 1.0628999471664429, | |
"eval_runtime": 104.8979, | |
"eval_samples_per_second": 198.107, | |
"eval_steps_per_second": 2.069, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 2.01, | |
"learning_rate": 2.5708375125376127e-05, | |
"loss": 1.0403, | |
"step": 8250 | |
}, | |
{ | |
"epoch": 2.01, | |
"eval_accuracy": 0.6683990183340551, | |
"eval_f1_score": 0.655431489724863, | |
"eval_loss": 1.0640465021133423, | |
"eval_runtime": 104.8293, | |
"eval_samples_per_second": 198.237, | |
"eval_steps_per_second": 2.07, | |
"step": 8250 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 2.4924774322968907e-05, | |
"loss": 0.9083, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 2.07, | |
"eval_accuracy": 0.6707569414368895, | |
"eval_f1_score": 0.6605996988642956, | |
"eval_loss": 1.0650361776351929, | |
"eval_runtime": 104.9496, | |
"eval_samples_per_second": 198.009, | |
"eval_steps_per_second": 2.068, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 2.13, | |
"learning_rate": 2.4141173520561687e-05, | |
"loss": 0.9045, | |
"step": 8750 | |
}, | |
{ | |
"epoch": 2.13, | |
"eval_accuracy": 0.6696501612049468, | |
"eval_f1_score": 0.6590246628667181, | |
"eval_loss": 1.0609605312347412, | |
"eval_runtime": 105.2135, | |
"eval_samples_per_second": 197.513, | |
"eval_steps_per_second": 2.062, | |
"step": 8750 | |
}, | |
{ | |
"epoch": 2.19, | |
"learning_rate": 2.3357572718154463e-05, | |
"loss": 0.8998, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 2.19, | |
"eval_accuracy": 0.6709975458351378, | |
"eval_f1_score": 0.6609721373546371, | |
"eval_loss": 1.0593194961547852, | |
"eval_runtime": 104.8728, | |
"eval_samples_per_second": 198.154, | |
"eval_steps_per_second": 2.069, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 2.25, | |
"learning_rate": 2.2573971915747243e-05, | |
"loss": 0.8994, | |
"step": 9250 | |
}, | |
{ | |
"epoch": 2.25, | |
"eval_accuracy": 0.6718156007891825, | |
"eval_f1_score": 0.6593984410226574, | |
"eval_loss": 1.064655065536499, | |
"eval_runtime": 104.9299, | |
"eval_samples_per_second": 198.046, | |
"eval_steps_per_second": 2.068, | |
"step": 9250 | |
}, | |
{ | |
"epoch": 2.31, | |
"learning_rate": 2.1790371113340023e-05, | |
"loss": 0.9093, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 2.31, | |
"eval_accuracy": 0.668880227130552, | |
"eval_f1_score": 0.6589669423489957, | |
"eval_loss": 1.0653289556503296, | |
"eval_runtime": 105.0839, | |
"eval_samples_per_second": 197.756, | |
"eval_steps_per_second": 2.065, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 2.10067703109328e-05, | |
"loss": 0.8864, | |
"step": 9750 | |
}, | |
{ | |
"epoch": 2.37, | |
"eval_accuracy": 0.674895337086762, | |
"eval_f1_score": 0.664902640512849, | |
"eval_loss": 1.050571322441101, | |
"eval_runtime": 105.0155, | |
"eval_samples_per_second": 197.885, | |
"eval_steps_per_second": 2.066, | |
"step": 9750 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 2.0223169508525576e-05, | |
"loss": 0.8905, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 2.43, | |
"eval_accuracy": 0.6742697656513161, | |
"eval_f1_score": 0.6644634718259311, | |
"eval_loss": 1.0541752576828003, | |
"eval_runtime": 104.7466, | |
"eval_samples_per_second": 198.393, | |
"eval_steps_per_second": 2.072, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 1.9439568706118356e-05, | |
"loss": 0.8981, | |
"step": 10250 | |
}, | |
{ | |
"epoch": 2.49, | |
"eval_accuracy": 0.6703238535200423, | |
"eval_f1_score": 0.6590618499027966, | |
"eval_loss": 1.0549697875976562, | |
"eval_runtime": 105.0016, | |
"eval_samples_per_second": 197.911, | |
"eval_steps_per_second": 2.067, | |
"step": 10250 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 1.8655967903711136e-05, | |
"loss": 0.9038, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 2.55, | |
"eval_accuracy": 0.671286271113036, | |
"eval_f1_score": 0.6622994563067591, | |
"eval_loss": 1.047540307044983, | |
"eval_runtime": 105.0268, | |
"eval_samples_per_second": 197.864, | |
"eval_steps_per_second": 2.066, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 1.7872367101303912e-05, | |
"loss": 0.8884, | |
"step": 10750 | |
}, | |
{ | |
"epoch": 2.61, | |
"eval_accuracy": 0.6739329194937683, | |
"eval_f1_score": 0.6640063633487168, | |
"eval_loss": 1.0482571125030518, | |
"eval_runtime": 104.9667, | |
"eval_samples_per_second": 197.977, | |
"eval_steps_per_second": 2.067, | |
"step": 10750 | |
}, | |
{ | |
"epoch": 2.67, | |
"learning_rate": 1.7088766298896692e-05, | |
"loss": 0.8993, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 2.67, | |
"eval_accuracy": 0.6732592271786728, | |
"eval_f1_score": 0.6644247957468762, | |
"eval_loss": 1.0455670356750488, | |
"eval_runtime": 104.9075, | |
"eval_samples_per_second": 198.089, | |
"eval_steps_per_second": 2.068, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 1.630516549648947e-05, | |
"loss": 0.8881, | |
"step": 11250 | |
}, | |
{ | |
"epoch": 2.74, | |
"eval_accuracy": 0.6758577546797555, | |
"eval_f1_score": 0.6663120997306459, | |
"eval_loss": 1.0385903120040894, | |
"eval_runtime": 104.9411, | |
"eval_samples_per_second": 198.025, | |
"eval_steps_per_second": 2.068, | |
"step": 11250 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 1.552156469408225e-05, | |
"loss": 0.8774, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_accuracy": 0.677590106347144, | |
"eval_f1_score": 0.6679638313459818, | |
"eval_loss": 1.0412800312042236, | |
"eval_runtime": 104.9104, | |
"eval_samples_per_second": 198.083, | |
"eval_steps_per_second": 2.068, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 1.4737963891675025e-05, | |
"loss": 0.8725, | |
"step": 11750 | |
}, | |
{ | |
"epoch": 2.86, | |
"eval_accuracy": 0.6772051393099466, | |
"eval_f1_score": 0.6684538458032429, | |
"eval_loss": 1.041609287261963, | |
"eval_runtime": 104.9692, | |
"eval_samples_per_second": 197.972, | |
"eval_steps_per_second": 2.067, | |
"step": 11750 | |
}, | |
{ | |
"epoch": 2.92, | |
"learning_rate": 1.3954363089267805e-05, | |
"loss": 0.8814, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 2.92, | |
"eval_accuracy": 0.6780231942639912, | |
"eval_f1_score": 0.668403494357387, | |
"eval_loss": 1.0360217094421387, | |
"eval_runtime": 104.9925, | |
"eval_samples_per_second": 197.929, | |
"eval_steps_per_second": 2.067, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 1.3170762286860583e-05, | |
"loss": 0.8707, | |
"step": 12250 | |
}, | |
{ | |
"epoch": 2.98, | |
"eval_accuracy": 0.6776382272267937, | |
"eval_f1_score": 0.6678552195861074, | |
"eval_loss": 1.036238193511963, | |
"eval_runtime": 104.9928, | |
"eval_samples_per_second": 197.928, | |
"eval_steps_per_second": 2.067, | |
"step": 12250 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 1.2387161484453361e-05, | |
"loss": 0.803, | |
"step": 12500 | |
}, | |
{ | |
"epoch": 3.04, | |
"eval_accuracy": 0.678504403060488, | |
"eval_f1_score": 0.6693071368875216, | |
"eval_loss": 1.048478364944458, | |
"eval_runtime": 104.9691, | |
"eval_samples_per_second": 197.973, | |
"eval_steps_per_second": 2.067, | |
"step": 12500 | |
}, | |
{ | |
"epoch": 3.1, | |
"learning_rate": 1.160356068204614e-05, | |
"loss": 0.7647, | |
"step": 12750 | |
}, | |
{ | |
"epoch": 3.1, | |
"eval_accuracy": 0.6773976228285453, | |
"eval_f1_score": 0.667836848673267, | |
"eval_loss": 1.0551421642303467, | |
"eval_runtime": 104.9474, | |
"eval_samples_per_second": 198.013, | |
"eval_steps_per_second": 2.068, | |
"step": 12750 | |
}, | |
{ | |
"epoch": 3.16, | |
"learning_rate": 1.0819959879638917e-05, | |
"loss": 0.7595, | |
"step": 13000 | |
}, | |
{ | |
"epoch": 3.16, | |
"eval_accuracy": 0.6766758096338001, | |
"eval_f1_score": 0.6681691840022692, | |
"eval_loss": 1.0496253967285156, | |
"eval_runtime": 104.8372, | |
"eval_samples_per_second": 198.222, | |
"eval_steps_per_second": 2.07, | |
"step": 13000 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 1.0036359077231696e-05, | |
"loss": 0.7628, | |
"step": 13250 | |
}, | |
{ | |
"epoch": 3.22, | |
"eval_accuracy": 0.6778788316250421, | |
"eval_f1_score": 0.6698784049603582, | |
"eval_loss": 1.0489078760147095, | |
"eval_runtime": 104.8411, | |
"eval_samples_per_second": 198.214, | |
"eval_steps_per_second": 2.07, | |
"step": 13250 | |
}, | |
{ | |
"epoch": 3.28, | |
"learning_rate": 9.252758274824474e-06, | |
"loss": 0.7711, | |
"step": 13500 | |
}, | |
{ | |
"epoch": 3.28, | |
"eval_accuracy": 0.6778307107453925, | |
"eval_f1_score": 0.670537104393122, | |
"eval_loss": 1.0493261814117432, | |
"eval_runtime": 104.9371, | |
"eval_samples_per_second": 198.033, | |
"eval_steps_per_second": 2.068, | |
"step": 13500 | |
}, | |
{ | |
"epoch": 3.34, | |
"learning_rate": 8.469157472417252e-06, | |
"loss": 0.7659, | |
"step": 13750 | |
}, | |
{ | |
"epoch": 3.34, | |
"eval_accuracy": 0.677445743708195, | |
"eval_f1_score": 0.6690005925034824, | |
"eval_loss": 1.0476195812225342, | |
"eval_runtime": 104.8587, | |
"eval_samples_per_second": 198.181, | |
"eval_steps_per_second": 2.069, | |
"step": 13750 | |
}, | |
{ | |
"epoch": 3.4, | |
"learning_rate": 7.68555667001003e-06, | |
"loss": 0.7466, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_accuracy": 0.6767720513930995, | |
"eval_f1_score": 0.6682514083974397, | |
"eval_loss": 1.0503697395324707, | |
"eval_runtime": 104.8988, | |
"eval_samples_per_second": 198.105, | |
"eval_steps_per_second": 2.069, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 3.46, | |
"learning_rate": 6.901955867602809e-06, | |
"loss": 0.7438, | |
"step": 14250 | |
}, | |
{ | |
"epoch": 3.46, | |
"eval_accuracy": 0.6778788316250421, | |
"eval_f1_score": 0.6704394720403158, | |
"eval_loss": 1.0488700866699219, | |
"eval_runtime": 104.8654, | |
"eval_samples_per_second": 198.168, | |
"eval_steps_per_second": 2.069, | |
"step": 14250 | |
}, | |
{ | |
"epoch": 3.53, | |
"learning_rate": 6.118355065195587e-06, | |
"loss": 0.7492, | |
"step": 14500 | |
}, | |
{ | |
"epoch": 3.53, | |
"eval_accuracy": 0.6762908425966027, | |
"eval_f1_score": 0.668875699854832, | |
"eval_loss": 1.0487704277038574, | |
"eval_runtime": 105.1614, | |
"eval_samples_per_second": 197.611, | |
"eval_steps_per_second": 2.063, | |
"step": 14500 | |
}, | |
{ | |
"epoch": 3.59, | |
"learning_rate": 5.334754262788365e-06, | |
"loss": 0.7534, | |
"step": 14750 | |
}, | |
{ | |
"epoch": 3.59, | |
"eval_accuracy": 0.6767239305134498, | |
"eval_f1_score": 0.6693352825093323, | |
"eval_loss": 1.0485919713974, | |
"eval_runtime": 105.4042, | |
"eval_samples_per_second": 197.155, | |
"eval_steps_per_second": 2.059, | |
"step": 14750 | |
}, | |
{ | |
"epoch": 3.65, | |
"learning_rate": 4.551153460381144e-06, | |
"loss": 0.7391, | |
"step": 15000 | |
}, | |
{ | |
"epoch": 3.65, | |
"eval_accuracy": 0.6791299744959338, | |
"eval_f1_score": 0.6716788527298224, | |
"eval_loss": 1.0460212230682373, | |
"eval_runtime": 105.024, | |
"eval_samples_per_second": 197.869, | |
"eval_steps_per_second": 2.066, | |
"step": 15000 | |
}, | |
{ | |
"epoch": 3.71, | |
"learning_rate": 3.7675526579739217e-06, | |
"loss": 0.7543, | |
"step": 15250 | |
}, | |
{ | |
"epoch": 3.71, | |
"eval_accuracy": 0.6804773591261248, | |
"eval_f1_score": 0.6726007691455318, | |
"eval_loss": 1.045469880104065, | |
"eval_runtime": 104.6353, | |
"eval_samples_per_second": 198.604, | |
"eval_steps_per_second": 2.074, | |
"step": 15250 | |
}, | |
{ | |
"epoch": 3.77, | |
"learning_rate": 2.9839518555667003e-06, | |
"loss": 0.7382, | |
"step": 15500 | |
}, | |
{ | |
"epoch": 3.77, | |
"eval_accuracy": 0.6802848756075262, | |
"eval_f1_score": 0.6728396464392151, | |
"eval_loss": 1.0436148643493652, | |
"eval_runtime": 105.0033, | |
"eval_samples_per_second": 197.908, | |
"eval_steps_per_second": 2.067, | |
"step": 15500 | |
} | |
], | |
"max_steps": 16452, | |
"num_train_epochs": 4, | |
"total_flos": 1.9763710480077926e+17, | |
"trial_name": null, | |
"trial_params": null | |
} | |