{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.44, "eval_loss": 0.3608391582965851, "eval_runtime": 2.9715, "eval_samples_per_second": 33.653, "eval_steps_per_second": 4.375, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.44, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.57, "eval_loss": 0.2890351712703705, "eval_runtime": 3.0216, "eval_samples_per_second": 33.095, "eval_steps_per_second": 4.302, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.57, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.58, "eval_loss": 0.29608333110809326, "eval_runtime": 3.0821, "eval_samples_per_second": 32.446, "eval_steps_per_second": 4.218, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.58, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.65, "eval_loss": 0.28651857376098633, "eval_runtime": 3.1117, "eval_samples_per_second": 32.137, "eval_steps_per_second": 4.178, "step": 100 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.58, "eval_loss": 0.2900591790676117, "eval_runtime": 3.136, "eval_samples_per_second": 31.888, "eval_steps_per_second": 4.145, "step": 125 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.46, "eval_loss": 0.2933403253555298, "eval_runtime": 3.1448, "eval_samples_per_second": 31.798, "eval_steps_per_second": 4.134, "step": 150 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.64, "eval_loss": 0.32912155985832214, "eval_runtime": 3.1478, "eval_samples_per_second": 31.768, "eval_steps_per_second": 4.13, "step": 175 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.62, "eval_loss": 0.28637343645095825, "eval_runtime": 3.1555, "eval_samples_per_second": 31.69, "eval_steps_per_second": 4.12, "step": 200 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.42, "eval_loss": 0.29787373542785645, "eval_runtime": 3.1584, "eval_samples_per_second": 31.662, "eval_steps_per_second": 4.116, "step": 225 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.63, "eval_loss": 0.3034938871860504, "eval_runtime": 3.1614, "eval_samples_per_second": 31.632, "eval_steps_per_second": 4.112, "step": 250 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.59, "eval_loss": 0.29021456837654114, "eval_runtime": 3.1628, "eval_samples_per_second": 31.618, "eval_steps_per_second": 4.11, "step": 275 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.5, "eval_loss": 0.2916863262653351, "eval_runtime": 3.1631, "eval_samples_per_second": 31.615, "eval_steps_per_second": 4.11, "step": 300 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.44, "eval_loss": 0.29346537590026855, "eval_runtime": 3.1644, "eval_samples_per_second": 31.601, "eval_steps_per_second": 4.108, "step": 325 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.44, "eval_loss": 0.3056786060333252, "eval_runtime": 3.1659, "eval_samples_per_second": 31.586, "eval_steps_per_second": 4.106, "step": 350 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.45, "eval_loss": 0.2980119287967682, "eval_runtime": 3.1656, "eval_samples_per_second": 31.589, "eval_steps_per_second": 4.107, "step": 375 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.47, "eval_loss": 0.2946815490722656, "eval_runtime": 3.166, "eval_samples_per_second": 31.585, "eval_steps_per_second": 4.106, "step": 400 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.5, "eval_loss": 0.29449501633644104, "eval_runtime": 3.1641, "eval_samples_per_second": 31.605, "eval_steps_per_second": 4.109, "step": 425 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.49, "eval_loss": 0.2923860251903534, "eval_runtime": 3.1667, "eval_samples_per_second": 31.579, "eval_steps_per_second": 4.105, "step": 450 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.55, "eval_loss": 0.2921885848045349, "eval_runtime": 3.1694, "eval_samples_per_second": 31.552, "eval_steps_per_second": 4.102, "step": 475 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.037500000000000006, "loss": 1.1902, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.45, "eval_loss": 0.292321115732193, "eval_runtime": 3.1784, "eval_samples_per_second": 31.462, "eval_steps_per_second": 4.09, "step": 500 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.55, "eval_loss": 0.2864084541797638, "eval_runtime": 3.1692, "eval_samples_per_second": 31.554, "eval_steps_per_second": 4.102, "step": 525 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.42, "eval_loss": 0.2925306558609009, "eval_runtime": 3.1691, "eval_samples_per_second": 31.555, "eval_steps_per_second": 4.102, "step": 550 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.58, "eval_loss": 0.2909621000289917, "eval_runtime": 3.1683, "eval_samples_per_second": 31.563, "eval_steps_per_second": 4.103, "step": 575 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.58, "eval_loss": 0.28951704502105713, "eval_runtime": 3.1698, "eval_samples_per_second": 31.548, "eval_steps_per_second": 4.101, "step": 600 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.62, "eval_loss": 0.2918489873409271, "eval_runtime": 3.1538, "eval_samples_per_second": 31.708, "eval_steps_per_second": 4.122, "step": 625 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.42, "eval_loss": 0.2921474874019623, "eval_runtime": 3.1532, "eval_samples_per_second": 31.714, "eval_steps_per_second": 4.123, "step": 650 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.58, "eval_loss": 0.29183533787727356, "eval_runtime": 3.153, "eval_samples_per_second": 31.716, "eval_steps_per_second": 4.123, "step": 675 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.6, "eval_loss": 0.2910160720348358, "eval_runtime": 3.1555, "eval_samples_per_second": 31.691, "eval_steps_per_second": 4.12, "step": 700 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.57, "eval_loss": 0.29186198115348816, "eval_runtime": 3.1611, "eval_samples_per_second": 31.635, "eval_steps_per_second": 4.113, "step": 725 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.48, "eval_loss": 0.2920497953891754, "eval_runtime": 3.1584, "eval_samples_per_second": 31.662, "eval_steps_per_second": 4.116, "step": 750 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.41, "eval_loss": 0.2922162115573883, "eval_runtime": 3.1557, "eval_samples_per_second": 31.688, "eval_steps_per_second": 4.119, "step": 775 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.53, "eval_loss": 0.2919849157333374, "eval_runtime": 3.1524, "eval_samples_per_second": 31.722, "eval_steps_per_second": 4.124, "step": 800 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.51, "eval_loss": 0.29196542501449585, "eval_runtime": 3.1533, "eval_samples_per_second": 31.713, "eval_steps_per_second": 4.123, "step": 825 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.54, "eval_loss": 0.29190951585769653, "eval_runtime": 3.1549, "eval_samples_per_second": 31.697, "eval_steps_per_second": 4.121, "step": 850 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.52, "eval_loss": 0.2919704020023346, "eval_runtime": 3.1601, "eval_samples_per_second": 31.645, "eval_steps_per_second": 4.114, "step": 875 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.39, "eval_loss": 0.29210489988327026, "eval_runtime": 3.1561, "eval_samples_per_second": 31.684, "eval_steps_per_second": 4.119, "step": 900 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.53, "eval_loss": 0.2919726073741913, "eval_runtime": 3.155, "eval_samples_per_second": 31.696, "eval_steps_per_second": 4.12, "step": 925 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.49, "eval_loss": 0.2919931411743164, "eval_runtime": 3.1566, "eval_samples_per_second": 31.68, "eval_steps_per_second": 4.118, "step": 950 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.4, "eval_loss": 0.29224902391433716, "eval_runtime": 3.1538, "eval_samples_per_second": 31.708, "eval_steps_per_second": 4.122, "step": 975 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.025, "loss": 0.8276, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.58, "eval_loss": 0.2919129729270935, "eval_runtime": 3.1515, "eval_samples_per_second": 31.731, "eval_steps_per_second": 4.125, "step": 1000 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.62, "eval_loss": 0.2918228805065155, "eval_runtime": 3.156, "eval_samples_per_second": 31.685, "eval_steps_per_second": 4.119, "step": 1025 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.61, "eval_loss": 0.2918192744255066, "eval_runtime": 3.153, "eval_samples_per_second": 31.716, "eval_steps_per_second": 4.123, "step": 1050 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.42, "eval_loss": 0.2921864688396454, "eval_runtime": 3.1516, "eval_samples_per_second": 31.73, "eval_steps_per_second": 4.125, "step": 1075 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.43, "eval_loss": 0.2921425700187683, "eval_runtime": 3.1517, "eval_samples_per_second": 31.729, "eval_steps_per_second": 4.125, "step": 1100 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.42, "eval_loss": 0.29200559854507446, "eval_runtime": 3.1538, "eval_samples_per_second": 31.708, "eval_steps_per_second": 4.122, "step": 1125 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.42, "eval_loss": 0.29199379682540894, "eval_runtime": 3.1521, "eval_samples_per_second": 31.725, "eval_steps_per_second": 4.124, "step": 1150 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.35, "eval_loss": 0.29200825095176697, "eval_runtime": 3.1518, "eval_samples_per_second": 31.728, "eval_steps_per_second": 4.125, "step": 1175 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.54, "eval_loss": 0.291986346244812, "eval_runtime": 3.1552, "eval_samples_per_second": 31.693, "eval_steps_per_second": 4.12, "step": 1200 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.6, "eval_loss": 0.29196932911872864, "eval_runtime": 3.1536, "eval_samples_per_second": 31.709, "eval_steps_per_second": 4.122, "step": 1225 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.52, "eval_loss": 0.2919679880142212, "eval_runtime": 3.1556, "eval_samples_per_second": 31.69, "eval_steps_per_second": 4.12, "step": 1250 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.37, "eval_loss": 0.2920134961605072, "eval_runtime": 3.1538, "eval_samples_per_second": 31.707, "eval_steps_per_second": 4.122, "step": 1275 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.45, "eval_loss": 0.2920159101486206, "eval_runtime": 3.1519, "eval_samples_per_second": 31.727, "eval_steps_per_second": 4.125, "step": 1300 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.44, "eval_loss": 0.2919909358024597, "eval_runtime": 3.1539, "eval_samples_per_second": 31.707, "eval_steps_per_second": 4.122, "step": 1325 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.59, "eval_loss": 0.29197338223457336, "eval_runtime": 3.1515, "eval_samples_per_second": 31.731, "eval_steps_per_second": 4.125, "step": 1350 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.44, "eval_loss": 0.29198840260505676, "eval_runtime": 3.1535, "eval_samples_per_second": 31.71, "eval_steps_per_second": 4.122, "step": 1375 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.58, "eval_loss": 0.2919729948043823, "eval_runtime": 3.1547, "eval_samples_per_second": 31.699, "eval_steps_per_second": 4.121, "step": 1400 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.57, "eval_loss": 0.29197487235069275, "eval_runtime": 3.1506, "eval_samples_per_second": 31.739, "eval_steps_per_second": 4.126, "step": 1425 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.46, "eval_loss": 0.2920148968696594, "eval_runtime": 3.1554, "eval_samples_per_second": 31.692, "eval_steps_per_second": 4.12, "step": 1450 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.42, "eval_loss": 0.29200279712677, "eval_runtime": 3.1551, "eval_samples_per_second": 31.694, "eval_steps_per_second": 4.12, "step": 1475 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0125, "loss": 0.6389, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.37, "eval_loss": 0.2919899523258209, "eval_runtime": 3.1542, "eval_samples_per_second": 31.704, "eval_steps_per_second": 4.121, "step": 1500 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.6, "eval_loss": 0.29192057251930237, "eval_runtime": 3.1549, "eval_samples_per_second": 31.697, "eval_steps_per_second": 4.121, "step": 1525 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.6, "eval_loss": 0.29190564155578613, "eval_runtime": 3.1549, "eval_samples_per_second": 31.696, "eval_steps_per_second": 4.121, "step": 1550 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.55, "eval_loss": 0.29197636246681213, "eval_runtime": 3.1562, "eval_samples_per_second": 31.684, "eval_steps_per_second": 4.119, "step": 1575 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.52, "eval_loss": 0.2919798195362091, "eval_runtime": 3.1523, "eval_samples_per_second": 31.723, "eval_steps_per_second": 4.124, "step": 1600 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.5, "eval_loss": 0.2919777035713196, "eval_runtime": 3.157, "eval_samples_per_second": 31.676, "eval_steps_per_second": 4.118, "step": 1625 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.36, "eval_loss": 0.2920242249965668, "eval_runtime": 3.1564, "eval_samples_per_second": 31.682, "eval_steps_per_second": 4.119, "step": 1650 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.58, "eval_loss": 0.29195553064346313, "eval_runtime": 3.1547, "eval_samples_per_second": 31.698, "eval_steps_per_second": 4.121, "step": 1675 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.38, "eval_loss": 0.29199790954589844, "eval_runtime": 3.1555, "eval_samples_per_second": 31.69, "eval_steps_per_second": 4.12, "step": 1700 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.58, "eval_loss": 0.2919797897338867, "eval_runtime": 3.1523, "eval_samples_per_second": 31.723, "eval_steps_per_second": 4.124, "step": 1725 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.53, "eval_loss": 0.2919650375843048, "eval_runtime": 3.1536, "eval_samples_per_second": 31.71, "eval_steps_per_second": 4.122, "step": 1750 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.37, "eval_loss": 0.2920193374156952, "eval_runtime": 3.1551, "eval_samples_per_second": 31.695, "eval_steps_per_second": 4.12, "step": 1775 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.39, "eval_loss": 0.2920474112033844, "eval_runtime": 3.1506, "eval_samples_per_second": 31.74, "eval_steps_per_second": 4.126, "step": 1800 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.36, "eval_loss": 0.29199421405792236, "eval_runtime": 3.1521, "eval_samples_per_second": 31.724, "eval_steps_per_second": 4.124, "step": 1825 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.43, "eval_loss": 0.29198718070983887, "eval_runtime": 3.1542, "eval_samples_per_second": 31.703, "eval_steps_per_second": 4.121, "step": 1850 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.38, "eval_loss": 0.29200947284698486, "eval_runtime": 3.1537, "eval_samples_per_second": 31.709, "eval_steps_per_second": 4.122, "step": 1875 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.43, "eval_loss": 0.29198354482650757, "eval_runtime": 3.1536, "eval_samples_per_second": 31.71, "eval_steps_per_second": 4.122, "step": 1900 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.37, "eval_loss": 0.2920118570327759, "eval_runtime": 3.152, "eval_samples_per_second": 31.726, "eval_steps_per_second": 4.124, "step": 1925 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.37, "eval_loss": 0.29201093316078186, "eval_runtime": 3.1508, "eval_samples_per_second": 31.738, "eval_steps_per_second": 4.126, "step": 1950 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.38, "eval_loss": 0.2919941246509552, "eval_runtime": 3.1558, "eval_samples_per_second": 31.688, "eval_steps_per_second": 4.119, "step": 1975 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.5225, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.4, "eval_loss": 0.2919914722442627, "eval_runtime": 3.1525, "eval_samples_per_second": 31.721, "eval_steps_per_second": 4.124, "step": 2000 }, { "best_epoch": 3, "best_eval_accuracy": 0.65, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.7947869720458984, "train_runtime": 1692.4229, "train_samples_per_second": 18.908, "train_steps_per_second": 1.182 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }