{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.38, "eval_loss": 0.5778459906578064, "eval_runtime": 2.9164, "eval_samples_per_second": 34.289, "eval_steps_per_second": 4.458, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.38, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.66, "eval_loss": 0.5810088515281677, "eval_runtime": 2.9673, "eval_samples_per_second": 33.701, "eval_steps_per_second": 4.381, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.65, "eval_loss": 0.6271185874938965, "eval_runtime": 3.0283, "eval_samples_per_second": 33.022, "eval_steps_per_second": 4.293, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.64, "eval_loss": 0.5771670341491699, "eval_runtime": 3.068, "eval_samples_per_second": 32.595, "eval_steps_per_second": 4.237, "step": 100 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.62, "eval_loss": 0.5290189981460571, "eval_runtime": 3.0958, "eval_samples_per_second": 32.301, "eval_steps_per_second": 4.199, "step": 125 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.62, "eval_loss": 0.5351535677909851, "eval_runtime": 3.1094, "eval_samples_per_second": 32.16, "eval_steps_per_second": 4.181, "step": 150 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.61, "eval_loss": 0.5321515798568726, "eval_runtime": 3.1264, "eval_samples_per_second": 31.985, "eval_steps_per_second": 4.158, "step": 175 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.64, "eval_loss": 0.5976082682609558, "eval_runtime": 3.1371, "eval_samples_per_second": 31.876, "eval_steps_per_second": 4.144, "step": 200 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.61, "eval_loss": 0.5289967656135559, "eval_runtime": 3.1422, "eval_samples_per_second": 31.825, "eval_steps_per_second": 4.137, "step": 225 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.66, "eval_loss": 0.569958508014679, "eval_runtime": 3.1468, "eval_samples_per_second": 31.778, "eval_steps_per_second": 4.131, "step": 250 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.66, "eval_loss": 0.5132102966308594, "eval_runtime": 3.1512, "eval_samples_per_second": 31.734, "eval_steps_per_second": 4.125, "step": 275 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.64, "eval_loss": 0.5155261754989624, "eval_runtime": 3.163, "eval_samples_per_second": 31.616, "eval_steps_per_second": 4.11, "step": 300 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.67, "eval_loss": 0.5048606395721436, "eval_runtime": 3.1576, "eval_samples_per_second": 31.67, "eval_steps_per_second": 4.117, "step": 325 }, { "best_epoch": 12, "best_eval_accuracy": 0.67, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.67, "eval_loss": 0.507844865322113, "eval_runtime": 3.1514, "eval_samples_per_second": 31.732, "eval_steps_per_second": 4.125, "step": 350 }, { "best_epoch": 12, "best_eval_accuracy": 0.67, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.68, "eval_loss": 0.48210325837135315, "eval_runtime": 3.149, "eval_samples_per_second": 31.756, "eval_steps_per_second": 4.128, "step": 375 }, { "best_epoch": 14, "best_eval_accuracy": 0.68, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.7, "eval_loss": 0.5371115207672119, "eval_runtime": 3.1482, "eval_samples_per_second": 31.765, "eval_steps_per_second": 4.129, "step": 400 }, { "best_epoch": 15, "best_eval_accuracy": 0.7, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.69, "eval_loss": 0.5406803488731384, "eval_runtime": 3.147, "eval_samples_per_second": 31.776, "eval_steps_per_second": 4.131, "step": 425 }, { "best_epoch": 15, "best_eval_accuracy": 0.7, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.71, "eval_loss": 0.6440941095352173, "eval_runtime": 3.1467, "eval_samples_per_second": 31.779, "eval_steps_per_second": 4.131, "step": 450 }, { "best_epoch": 17, "best_eval_accuracy": 0.71, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.7, "eval_loss": 0.5787262916564941, "eval_runtime": 3.1434, "eval_samples_per_second": 31.812, "eval_steps_per_second": 4.136, "step": 475 }, { "best_epoch": 17, "best_eval_accuracy": 0.71, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.0075, "loss": 0.6402, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.68, "eval_loss": 0.5646016597747803, "eval_runtime": 3.1464, "eval_samples_per_second": 31.782, "eval_steps_per_second": 4.132, "step": 500 }, { "best_epoch": 17, "best_eval_accuracy": 0.71, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.71, "eval_loss": 0.5552840828895569, "eval_runtime": 3.1468, "eval_samples_per_second": 31.779, "eval_steps_per_second": 4.131, "step": 525 }, { "best_epoch": 17, "best_eval_accuracy": 0.71, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.72, "eval_loss": 0.613720178604126, "eval_runtime": 3.1448, "eval_samples_per_second": 31.799, "eval_steps_per_second": 4.134, "step": 550 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.71, "eval_loss": 0.49484309554100037, "eval_runtime": 3.146, "eval_samples_per_second": 31.786, "eval_steps_per_second": 4.132, "step": 575 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.72, "eval_loss": 0.550967812538147, "eval_runtime": 3.1467, "eval_samples_per_second": 31.78, "eval_steps_per_second": 4.131, "step": 600 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.7, "eval_loss": 0.5984699130058289, "eval_runtime": 3.146, "eval_samples_per_second": 31.786, "eval_steps_per_second": 4.132, "step": 625 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.71, "eval_loss": 0.566030740737915, "eval_runtime": 3.1483, "eval_samples_per_second": 31.763, "eval_steps_per_second": 4.129, "step": 650 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.71, "eval_loss": 0.523222029209137, "eval_runtime": 3.1485, "eval_samples_per_second": 31.762, "eval_steps_per_second": 4.129, "step": 675 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.71, "eval_loss": 0.5381032824516296, "eval_runtime": 3.1492, "eval_samples_per_second": 31.754, "eval_steps_per_second": 4.128, "step": 700 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.71, "eval_loss": 0.5234293937683105, "eval_runtime": 3.147, "eval_samples_per_second": 31.776, "eval_steps_per_second": 4.131, "step": 725 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.71, "eval_loss": 0.6145434975624084, "eval_runtime": 3.1487, "eval_samples_per_second": 31.759, "eval_steps_per_second": 4.129, "step": 750 }, { "best_epoch": 21, "best_eval_accuracy": 0.72, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.73, "eval_loss": 0.5482304096221924, "eval_runtime": 3.1469, "eval_samples_per_second": 31.777, "eval_steps_per_second": 4.131, "step": 775 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.72, "eval_loss": 0.5246371626853943, "eval_runtime": 3.1482, "eval_samples_per_second": 31.764, "eval_steps_per_second": 4.129, "step": 800 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.71, "eval_loss": 0.5257811546325684, "eval_runtime": 3.1597, "eval_samples_per_second": 31.649, "eval_steps_per_second": 4.114, "step": 825 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.72, "eval_loss": 0.5278013348579407, "eval_runtime": 3.1476, "eval_samples_per_second": 31.771, "eval_steps_per_second": 4.13, "step": 850 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.72, "eval_loss": 0.5244772434234619, "eval_runtime": 3.1478, "eval_samples_per_second": 31.769, "eval_steps_per_second": 4.13, "step": 875 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.72, "eval_loss": 0.5072507262229919, "eval_runtime": 3.1456, "eval_samples_per_second": 31.79, "eval_steps_per_second": 4.133, "step": 900 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.72, "eval_loss": 0.4982739984989166, "eval_runtime": 3.1457, "eval_samples_per_second": 31.79, "eval_steps_per_second": 4.133, "step": 925 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.73, "eval_loss": 0.5076562166213989, "eval_runtime": 3.147, "eval_samples_per_second": 31.776, "eval_steps_per_second": 4.131, "step": 950 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.73, "eval_loss": 0.5262712240219116, "eval_runtime": 3.1494, "eval_samples_per_second": 31.752, "eval_steps_per_second": 4.128, "step": 975 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.005, "loss": 0.3719, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.73, "eval_loss": 0.5096136331558228, "eval_runtime": 3.1456, "eval_samples_per_second": 31.79, "eval_steps_per_second": 4.133, "step": 1000 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.73, "eval_loss": 0.5338824391365051, "eval_runtime": 3.1496, "eval_samples_per_second": 31.75, "eval_steps_per_second": 4.128, "step": 1025 }, { "best_epoch": 30, "best_eval_accuracy": 0.73, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.75, "eval_loss": 0.4963531792163849, "eval_runtime": 3.1494, "eval_samples_per_second": 31.752, "eval_steps_per_second": 4.128, "step": 1050 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.73, "eval_loss": 0.4831782877445221, "eval_runtime": 3.1482, "eval_samples_per_second": 31.764, "eval_steps_per_second": 4.129, "step": 1075 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.73, "eval_loss": 0.49397972226142883, "eval_runtime": 3.1476, "eval_samples_per_second": 31.77, "eval_steps_per_second": 4.13, "step": 1100 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.72, "eval_loss": 0.49817508459091187, "eval_runtime": 3.1507, "eval_samples_per_second": 31.739, "eval_steps_per_second": 4.126, "step": 1125 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.73, "eval_loss": 0.5449243187904358, "eval_runtime": 3.1528, "eval_samples_per_second": 31.718, "eval_steps_per_second": 4.123, "step": 1150 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.73, "eval_loss": 0.5174975395202637, "eval_runtime": 3.1533, "eval_samples_per_second": 31.713, "eval_steps_per_second": 4.123, "step": 1175 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.74, "eval_loss": 0.5207518339157104, "eval_runtime": 3.1444, "eval_samples_per_second": 31.803, "eval_steps_per_second": 4.134, "step": 1200 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.74, "eval_loss": 0.5281075835227966, "eval_runtime": 3.1471, "eval_samples_per_second": 31.775, "eval_steps_per_second": 4.131, "step": 1225 }, { "best_epoch": 41, "best_eval_accuracy": 0.75, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.76, "eval_loss": 0.4939603805541992, "eval_runtime": 3.1477, "eval_samples_per_second": 31.77, "eval_steps_per_second": 4.13, "step": 1250 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.74, "eval_loss": 0.5019869208335876, "eval_runtime": 3.1472, "eval_samples_per_second": 31.774, "eval_steps_per_second": 4.131, "step": 1275 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.74, "eval_loss": 0.5010063052177429, "eval_runtime": 3.1485, "eval_samples_per_second": 31.761, "eval_steps_per_second": 4.129, "step": 1300 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.73, "eval_loss": 0.4799458384513855, "eval_runtime": 3.1487, "eval_samples_per_second": 31.759, "eval_steps_per_second": 4.129, "step": 1325 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.74, "eval_loss": 0.5205541253089905, "eval_runtime": 3.1466, "eval_samples_per_second": 31.78, "eval_steps_per_second": 4.131, "step": 1350 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.75, "eval_loss": 0.5148220062255859, "eval_runtime": 3.1506, "eval_samples_per_second": 31.74, "eval_steps_per_second": 4.126, "step": 1375 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.74, "eval_loss": 0.481507271528244, "eval_runtime": 3.1452, "eval_samples_per_second": 31.795, "eval_steps_per_second": 4.133, "step": 1400 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.74, "eval_loss": 0.4951218068599701, "eval_runtime": 3.1457, "eval_samples_per_second": 31.789, "eval_steps_per_second": 4.133, "step": 1425 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.74, "eval_loss": 0.507718563079834, "eval_runtime": 3.145, "eval_samples_per_second": 31.797, "eval_steps_per_second": 4.134, "step": 1450 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.74, "eval_loss": 0.5000278353691101, "eval_runtime": 3.146, "eval_samples_per_second": 31.786, "eval_steps_per_second": 4.132, "step": 1475 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0025, "loss": 0.3121, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.75, "eval_loss": 0.5124149918556213, "eval_runtime": 3.1487, "eval_samples_per_second": 31.759, "eval_steps_per_second": 4.129, "step": 1500 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.76, "eval_loss": 0.4891398549079895, "eval_runtime": 3.1463, "eval_samples_per_second": 31.784, "eval_steps_per_second": 4.132, "step": 1525 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.75, "eval_loss": 0.4993504583835602, "eval_runtime": 3.1451, "eval_samples_per_second": 31.795, "eval_steps_per_second": 4.133, "step": 1550 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.75, "eval_loss": 0.49470365047454834, "eval_runtime": 3.1452, "eval_samples_per_second": 31.794, "eval_steps_per_second": 4.133, "step": 1575 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.74, "eval_loss": 0.4832766652107239, "eval_runtime": 3.1469, "eval_samples_per_second": 31.777, "eval_steps_per_second": 4.131, "step": 1600 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.75, "eval_loss": 0.5135172605514526, "eval_runtime": 3.1462, "eval_samples_per_second": 31.785, "eval_steps_per_second": 4.132, "step": 1625 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.75, "eval_loss": 0.4803304374217987, "eval_runtime": 3.1472, "eval_samples_per_second": 31.775, "eval_steps_per_second": 4.131, "step": 1650 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.75, "eval_loss": 0.5058041214942932, "eval_runtime": 3.1437, "eval_samples_per_second": 31.81, "eval_steps_per_second": 4.135, "step": 1675 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.75, "eval_loss": 0.48404258489608765, "eval_runtime": 3.1474, "eval_samples_per_second": 31.772, "eval_steps_per_second": 4.13, "step": 1700 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.75, "eval_loss": 0.5051174759864807, "eval_runtime": 3.1517, "eval_samples_per_second": 31.728, "eval_steps_per_second": 4.125, "step": 1725 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.74, "eval_loss": 0.4883221387863159, "eval_runtime": 3.1453, "eval_samples_per_second": 31.794, "eval_steps_per_second": 4.133, "step": 1750 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.74, "eval_loss": 0.497234046459198, "eval_runtime": 3.146, "eval_samples_per_second": 31.786, "eval_steps_per_second": 4.132, "step": 1775 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.74, "eval_loss": 0.4789247214794159, "eval_runtime": 3.1455, "eval_samples_per_second": 31.791, "eval_steps_per_second": 4.133, "step": 1800 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.74, "eval_loss": 0.49844783544540405, "eval_runtime": 3.1428, "eval_samples_per_second": 31.818, "eval_steps_per_second": 4.136, "step": 1825 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.74, "eval_loss": 0.4913429021835327, "eval_runtime": 3.1433, "eval_samples_per_second": 31.813, "eval_steps_per_second": 4.136, "step": 1850 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.74, "eval_loss": 0.4879249632358551, "eval_runtime": 3.1452, "eval_samples_per_second": 31.794, "eval_steps_per_second": 4.133, "step": 1875 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.74, "eval_loss": 0.4901810884475708, "eval_runtime": 3.1415, "eval_samples_per_second": 31.832, "eval_steps_per_second": 4.138, "step": 1900 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.74, "eval_loss": 0.48560553789138794, "eval_runtime": 3.1442, "eval_samples_per_second": 31.804, "eval_steps_per_second": 4.135, "step": 1925 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.74, "eval_loss": 0.48929107189178467, "eval_runtime": 3.1449, "eval_samples_per_second": 31.797, "eval_steps_per_second": 4.134, "step": 1950 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.75, "eval_loss": 0.4907485842704773, "eval_runtime": 3.1529, "eval_samples_per_second": 31.717, "eval_steps_per_second": 4.123, "step": 1975 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.2906, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.75, "eval_loss": 0.4898410737514496, "eval_runtime": 3.1539, "eval_samples_per_second": 31.707, "eval_steps_per_second": 4.122, "step": 2000 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.4037043533325195, "train_runtime": 1668.0695, "train_samples_per_second": 19.184, "train_steps_per_second": 1.199 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }