{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.36, "eval_loss": 0.6900678277015686, "eval_runtime": 2.9568, "eval_samples_per_second": 33.82, "eval_steps_per_second": 4.397, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.36, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.56, "eval_loss": 0.7172253131866455, "eval_runtime": 3.0289, "eval_samples_per_second": 33.016, "eval_steps_per_second": 4.292, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.56, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.58, "eval_loss": 0.6264107823371887, "eval_runtime": 3.0763, "eval_samples_per_second": 32.507, "eval_steps_per_second": 4.226, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.58, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.61, "eval_loss": 0.5863716006278992, "eval_runtime": 3.1066, "eval_samples_per_second": 32.189, "eval_steps_per_second": 4.185, "step": 100 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.47, "eval_loss": 0.5716881155967712, "eval_runtime": 3.1333, "eval_samples_per_second": 31.915, "eval_steps_per_second": 4.149, "step": 125 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.4, "eval_loss": 0.6748393774032593, "eval_runtime": 3.1391, "eval_samples_per_second": 31.856, "eval_steps_per_second": 4.141, "step": 150 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.5271764993667603, "eval_runtime": 3.1483, "eval_samples_per_second": 31.763, "eval_steps_per_second": 4.129, "step": 175 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.64, "eval_loss": 0.565096914768219, "eval_runtime": 3.1481, "eval_samples_per_second": 31.765, "eval_steps_per_second": 4.129, "step": 200 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.65, "eval_loss": 0.578468918800354, "eval_runtime": 3.1568, "eval_samples_per_second": 31.677, "eval_steps_per_second": 4.118, "step": 225 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.65, "eval_loss": 0.5772669911384583, "eval_runtime": 3.1558, "eval_samples_per_second": 31.688, "eval_steps_per_second": 4.119, "step": 250 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.65, "eval_loss": 0.5286800265312195, "eval_runtime": 3.1451, "eval_samples_per_second": 31.795, "eval_steps_per_second": 4.133, "step": 275 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.64, "eval_loss": 0.5612473487854004, "eval_runtime": 3.1564, "eval_samples_per_second": 31.682, "eval_steps_per_second": 4.119, "step": 300 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.66, "eval_loss": 0.5733940005302429, "eval_runtime": 3.1466, "eval_samples_per_second": 31.78, "eval_steps_per_second": 4.131, "step": 325 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.65, "eval_loss": 0.5195974111557007, "eval_runtime": 3.1431, "eval_samples_per_second": 31.815, "eval_steps_per_second": 4.136, "step": 350 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.66, "eval_loss": 0.5490823984146118, "eval_runtime": 3.1425, "eval_samples_per_second": 31.821, "eval_steps_per_second": 4.137, "step": 375 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.63, "eval_loss": 0.5136759281158447, "eval_runtime": 3.1402, "eval_samples_per_second": 31.845, "eval_steps_per_second": 4.14, "step": 400 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.67, "eval_loss": 0.533294141292572, "eval_runtime": 3.1412, "eval_samples_per_second": 31.835, "eval_steps_per_second": 4.139, "step": 425 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.66, "eval_loss": 0.5518142580986023, "eval_runtime": 3.1392, "eval_samples_per_second": 31.855, "eval_steps_per_second": 4.141, "step": 450 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.66, "eval_loss": 0.5221859216690063, "eval_runtime": 3.1382, "eval_samples_per_second": 31.865, "eval_steps_per_second": 4.142, "step": 475 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.0075, "loss": 0.7077, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.67, "eval_loss": 0.4975990355014801, "eval_runtime": 3.1365, "eval_samples_per_second": 31.883, "eval_steps_per_second": 4.145, "step": 500 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.67, "eval_loss": 0.4995178282260895, "eval_runtime": 3.1409, "eval_samples_per_second": 31.838, "eval_steps_per_second": 4.139, "step": 525 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.65, "eval_loss": 0.5836873650550842, "eval_runtime": 3.1414, "eval_samples_per_second": 31.833, "eval_steps_per_second": 4.138, "step": 550 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.62, "eval_loss": 0.5800585150718689, "eval_runtime": 3.1419, "eval_samples_per_second": 31.828, "eval_steps_per_second": 4.138, "step": 575 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.63, "eval_loss": 0.5376842617988586, "eval_runtime": 3.1413, "eval_samples_per_second": 31.834, "eval_steps_per_second": 4.138, "step": 600 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.63, "eval_loss": 0.5508807897567749, "eval_runtime": 3.1403, "eval_samples_per_second": 31.844, "eval_steps_per_second": 4.14, "step": 625 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.67, "eval_loss": 0.5863495469093323, "eval_runtime": 3.1415, "eval_samples_per_second": 31.831, "eval_steps_per_second": 4.138, "step": 650 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.65, "eval_loss": 0.5980372428894043, "eval_runtime": 3.144, "eval_samples_per_second": 31.806, "eval_steps_per_second": 4.135, "step": 675 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.67, "eval_loss": 0.6481872797012329, "eval_runtime": 3.1402, "eval_samples_per_second": 31.845, "eval_steps_per_second": 4.14, "step": 700 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.66, "eval_loss": 0.5850781798362732, "eval_runtime": 3.144, "eval_samples_per_second": 31.807, "eval_steps_per_second": 4.135, "step": 725 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.67, "eval_loss": 0.6651497483253479, "eval_runtime": 3.1422, "eval_samples_per_second": 31.825, "eval_steps_per_second": 4.137, "step": 750 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.69, "eval_loss": 0.5496576428413391, "eval_runtime": 3.1415, "eval_samples_per_second": 31.832, "eval_steps_per_second": 4.138, "step": 775 }, { "best_epoch": 30, "best_eval_accuracy": 0.69, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.72, "eval_loss": 0.5907086133956909, "eval_runtime": 3.146, "eval_samples_per_second": 31.787, "eval_steps_per_second": 4.132, "step": 800 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.68, "eval_loss": 0.5805070400238037, "eval_runtime": 3.1454, "eval_samples_per_second": 31.792, "eval_steps_per_second": 4.133, "step": 825 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.69, "eval_loss": 0.5844395160675049, "eval_runtime": 3.1444, "eval_samples_per_second": 31.803, "eval_steps_per_second": 4.134, "step": 850 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.69, "eval_loss": 0.5749659538269043, "eval_runtime": 3.1443, "eval_samples_per_second": 31.803, "eval_steps_per_second": 4.134, "step": 875 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.7, "eval_loss": 0.6175033450126648, "eval_runtime": 3.1415, "eval_samples_per_second": 31.832, "eval_steps_per_second": 4.138, "step": 900 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.68, "eval_loss": 0.5753803849220276, "eval_runtime": 3.1437, "eval_samples_per_second": 31.809, "eval_steps_per_second": 4.135, "step": 925 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.69, "eval_loss": 0.575753390789032, "eval_runtime": 3.1433, "eval_samples_per_second": 31.814, "eval_steps_per_second": 4.136, "step": 950 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.69, "eval_loss": 0.6012661457061768, "eval_runtime": 3.1421, "eval_samples_per_second": 31.826, "eval_steps_per_second": 4.137, "step": 975 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.005, "loss": 0.4491, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.68, "eval_loss": 0.5383523106575012, "eval_runtime": 3.1442, "eval_samples_per_second": 31.805, "eval_steps_per_second": 4.135, "step": 1000 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.7, "eval_loss": 0.5930788516998291, "eval_runtime": 3.1439, "eval_samples_per_second": 31.807, "eval_steps_per_second": 4.135, "step": 1025 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.7, "eval_loss": 0.6029644012451172, "eval_runtime": 3.1452, "eval_samples_per_second": 31.794, "eval_steps_per_second": 4.133, "step": 1050 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.67, "eval_loss": 0.5629833936691284, "eval_runtime": 3.1473, "eval_samples_per_second": 31.773, "eval_steps_per_second": 4.13, "step": 1075 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.67, "eval_loss": 0.5599313974380493, "eval_runtime": 3.146, "eval_samples_per_second": 31.787, "eval_steps_per_second": 4.132, "step": 1100 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.66, "eval_loss": 0.5798678994178772, "eval_runtime": 3.1489, "eval_samples_per_second": 31.757, "eval_steps_per_second": 4.128, "step": 1125 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.69, "eval_loss": 0.5544764995574951, "eval_runtime": 3.1471, "eval_samples_per_second": 31.775, "eval_steps_per_second": 4.131, "step": 1150 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.68, "eval_loss": 0.5642539262771606, "eval_runtime": 3.1464, "eval_samples_per_second": 31.782, "eval_steps_per_second": 4.132, "step": 1175 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.7, "eval_loss": 0.584467887878418, "eval_runtime": 3.149, "eval_samples_per_second": 31.757, "eval_steps_per_second": 4.128, "step": 1200 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.69, "eval_loss": 0.5780852437019348, "eval_runtime": 3.1469, "eval_samples_per_second": 31.777, "eval_steps_per_second": 4.131, "step": 1225 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.7, "eval_loss": 0.5623089671134949, "eval_runtime": 3.1534, "eval_samples_per_second": 31.712, "eval_steps_per_second": 4.123, "step": 1250 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.69, "eval_loss": 0.5528396368026733, "eval_runtime": 3.1495, "eval_samples_per_second": 31.751, "eval_steps_per_second": 4.128, "step": 1275 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.71, "eval_loss": 0.544218122959137, "eval_runtime": 3.1486, "eval_samples_per_second": 31.76, "eval_steps_per_second": 4.129, "step": 1300 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.69, "eval_loss": 0.5498414039611816, "eval_runtime": 3.1488, "eval_samples_per_second": 31.759, "eval_steps_per_second": 4.129, "step": 1325 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.7, "eval_loss": 0.5390793681144714, "eval_runtime": 3.1542, "eval_samples_per_second": 31.704, "eval_steps_per_second": 4.121, "step": 1350 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.71, "eval_loss": 0.5569570660591125, "eval_runtime": 3.154, "eval_samples_per_second": 31.706, "eval_steps_per_second": 4.122, "step": 1375 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.71, "eval_loss": 0.5728991031646729, "eval_runtime": 3.1411, "eval_samples_per_second": 31.836, "eval_steps_per_second": 4.139, "step": 1400 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.72, "eval_loss": 0.5351668000221252, "eval_runtime": 3.1381, "eval_samples_per_second": 31.866, "eval_steps_per_second": 4.143, "step": 1425 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.7, "eval_loss": 0.5537936687469482, "eval_runtime": 3.1402, "eval_samples_per_second": 31.845, "eval_steps_per_second": 4.14, "step": 1450 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.71, "eval_loss": 0.5563415884971619, "eval_runtime": 3.137, "eval_samples_per_second": 31.878, "eval_steps_per_second": 4.144, "step": 1475 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0025, "loss": 0.3353, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.71, "eval_loss": 0.5703515410423279, "eval_runtime": 3.1319, "eval_samples_per_second": 31.93, "eval_steps_per_second": 4.151, "step": 1500 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.7, "eval_loss": 0.5726474523544312, "eval_runtime": 3.134, "eval_samples_per_second": 31.908, "eval_steps_per_second": 4.148, "step": 1525 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.7, "eval_loss": 0.5693960785865784, "eval_runtime": 3.1339, "eval_samples_per_second": 31.91, "eval_steps_per_second": 4.148, "step": 1550 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.71, "eval_loss": 0.5714474320411682, "eval_runtime": 3.132, "eval_samples_per_second": 31.928, "eval_steps_per_second": 4.151, "step": 1575 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.7, "eval_loss": 0.5551468729972839, "eval_runtime": 3.1362, "eval_samples_per_second": 31.886, "eval_steps_per_second": 4.145, "step": 1600 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.7, "eval_loss": 0.5547963380813599, "eval_runtime": 3.136, "eval_samples_per_second": 31.888, "eval_steps_per_second": 4.145, "step": 1625 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.7, "eval_loss": 0.5430014133453369, "eval_runtime": 3.1344, "eval_samples_per_second": 31.904, "eval_steps_per_second": 4.147, "step": 1650 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.71, "eval_loss": 0.5448939800262451, "eval_runtime": 3.1352, "eval_samples_per_second": 31.895, "eval_steps_per_second": 4.146, "step": 1675 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.71, "eval_loss": 0.5461122393608093, "eval_runtime": 3.134, "eval_samples_per_second": 31.908, "eval_steps_per_second": 4.148, "step": 1700 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.71, "eval_loss": 0.5440198183059692, "eval_runtime": 3.1319, "eval_samples_per_second": 31.93, "eval_steps_per_second": 4.151, "step": 1725 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.71, "eval_loss": 0.5590041875839233, "eval_runtime": 3.1357, "eval_samples_per_second": 31.891, "eval_steps_per_second": 4.146, "step": 1750 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.71, "eval_loss": 0.5390868186950684, "eval_runtime": 3.1403, "eval_samples_per_second": 31.844, "eval_steps_per_second": 4.14, "step": 1775 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.71, "eval_loss": 0.5516095757484436, "eval_runtime": 3.14, "eval_samples_per_second": 31.847, "eval_steps_per_second": 4.14, "step": 1800 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.72, "eval_loss": 0.547356128692627, "eval_runtime": 3.1285, "eval_samples_per_second": 31.964, "eval_steps_per_second": 4.155, "step": 1825 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.72, "eval_loss": 0.5476573705673218, "eval_runtime": 3.1324, "eval_samples_per_second": 31.924, "eval_steps_per_second": 4.15, "step": 1850 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.71, "eval_loss": 0.5372492074966431, "eval_runtime": 3.1315, "eval_samples_per_second": 31.933, "eval_steps_per_second": 4.151, "step": 1875 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.71, "eval_loss": 0.5445277690887451, "eval_runtime": 3.1318, "eval_samples_per_second": 31.93, "eval_steps_per_second": 4.151, "step": 1900 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.71, "eval_loss": 0.5421162247657776, "eval_runtime": 3.1315, "eval_samples_per_second": 31.933, "eval_steps_per_second": 4.151, "step": 1925 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.7, "eval_loss": 0.5376150012016296, "eval_runtime": 3.1315, "eval_samples_per_second": 31.934, "eval_steps_per_second": 4.151, "step": 1950 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.72, "eval_loss": 0.5357780456542969, "eval_runtime": 3.1264, "eval_samples_per_second": 31.986, "eval_steps_per_second": 4.158, "step": 1975 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.3108, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.72, "eval_loss": 0.5369434952735901, "eval_runtime": 3.1263, "eval_samples_per_second": 31.986, "eval_steps_per_second": 4.158, "step": 2000 }, { "best_epoch": 31, "best_eval_accuracy": 0.72, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.4507393341064453, "train_runtime": 1667.678, "train_samples_per_second": 19.188, "train_steps_per_second": 1.199 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }