{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.66, "eval_loss": 0.5415894389152527, "eval_runtime": 2.9244, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.445, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.64, "eval_loss": 0.5393697023391724, "eval_runtime": 2.9776, "eval_samples_per_second": 33.584, "eval_steps_per_second": 4.366, "step": 50 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.65, "eval_loss": 0.5375608801841736, "eval_runtime": 3.0286, "eval_samples_per_second": 33.018, "eval_steps_per_second": 4.292, "step": 75 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.65, "eval_loss": 0.5475878119468689, "eval_runtime": 3.06, "eval_samples_per_second": 32.68, "eval_steps_per_second": 4.248, "step": 100 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.64, "eval_loss": 0.5370528101921082, "eval_runtime": 3.0811, "eval_samples_per_second": 32.456, "eval_steps_per_second": 4.219, "step": 125 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.63, "eval_loss": 0.5441551208496094, "eval_runtime": 3.0936, "eval_samples_per_second": 32.324, "eval_steps_per_second": 4.202, "step": 150 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.65, "eval_loss": 0.5412868857383728, "eval_runtime": 3.1033, "eval_samples_per_second": 32.224, "eval_steps_per_second": 4.189, "step": 175 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.65, "eval_loss": 0.5381051898002625, "eval_runtime": 3.1116, "eval_samples_per_second": 32.138, "eval_steps_per_second": 4.178, "step": 200 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.65, "eval_loss": 0.5366445183753967, "eval_runtime": 3.1179, "eval_samples_per_second": 32.073, "eval_steps_per_second": 4.17, "step": 225 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.65, "eval_loss": 0.5402125120162964, "eval_runtime": 3.1204, "eval_samples_per_second": 32.047, "eval_steps_per_second": 4.166, "step": 250 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.65, "eval_loss": 0.5405026078224182, "eval_runtime": 3.1198, "eval_samples_per_second": 32.053, "eval_steps_per_second": 4.167, "step": 275 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.65, "eval_loss": 0.5396003127098083, "eval_runtime": 3.1242, "eval_samples_per_second": 32.009, "eval_steps_per_second": 4.161, "step": 300 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.66, "eval_loss": 0.5378925204277039, "eval_runtime": 3.1244, "eval_samples_per_second": 32.006, "eval_steps_per_second": 4.161, "step": 325 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.66, "eval_loss": 0.5375378727912903, "eval_runtime": 3.1264, "eval_samples_per_second": 31.985, "eval_steps_per_second": 4.158, "step": 350 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.65, "eval_loss": 0.5392658114433289, "eval_runtime": 3.1259, "eval_samples_per_second": 31.991, "eval_steps_per_second": 4.159, "step": 375 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.66, "eval_loss": 0.5370680689811707, "eval_runtime": 3.1284, "eval_samples_per_second": 31.965, "eval_steps_per_second": 4.155, "step": 400 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.66, "eval_loss": 0.5286482572555542, "eval_runtime": 3.128, "eval_samples_per_second": 31.969, "eval_steps_per_second": 4.156, "step": 425 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.65, "eval_loss": 0.5312860608100891, "eval_runtime": 3.1331, "eval_samples_per_second": 31.917, "eval_steps_per_second": 4.149, "step": 450 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.62, "eval_loss": 0.5426998734474182, "eval_runtime": 3.1294, "eval_samples_per_second": 31.955, "eval_steps_per_second": 4.154, "step": 475 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.00075, "loss": 0.616, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.63, "eval_loss": 0.5469082593917847, "eval_runtime": 3.1318, "eval_samples_per_second": 31.931, "eval_steps_per_second": 4.151, "step": 500 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.65, "eval_loss": 0.5347804427146912, "eval_runtime": 3.1345, "eval_samples_per_second": 31.903, "eval_steps_per_second": 4.147, "step": 525 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.64, "eval_loss": 0.5352023243904114, "eval_runtime": 3.1312, "eval_samples_per_second": 31.936, "eval_steps_per_second": 4.152, "step": 550 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.63, "eval_loss": 0.5434411764144897, "eval_runtime": 3.1342, "eval_samples_per_second": 31.906, "eval_steps_per_second": 4.148, "step": 575 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.62, "eval_loss": 0.543671190738678, "eval_runtime": 3.1348, "eval_samples_per_second": 31.9, "eval_steps_per_second": 4.147, "step": 600 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.65, "eval_loss": 0.5343627333641052, "eval_runtime": 3.1317, "eval_samples_per_second": 31.932, "eval_steps_per_second": 4.151, "step": 625 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.66, "eval_loss": 0.5343685746192932, "eval_runtime": 3.132, "eval_samples_per_second": 31.929, "eval_steps_per_second": 4.151, "step": 650 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.66, "eval_loss": 0.5318924188613892, "eval_runtime": 3.1314, "eval_samples_per_second": 31.934, "eval_steps_per_second": 4.151, "step": 675 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.66, "eval_loss": 0.5328655242919922, "eval_runtime": 3.1348, "eval_samples_per_second": 31.9, "eval_steps_per_second": 4.147, "step": 700 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.66, "eval_loss": 0.5312965512275696, "eval_runtime": 3.1312, "eval_samples_per_second": 31.937, "eval_steps_per_second": 4.152, "step": 725 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.66, "eval_loss": 0.5321267247200012, "eval_runtime": 3.1321, "eval_samples_per_second": 31.927, "eval_steps_per_second": 4.151, "step": 750 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.65, "eval_loss": 0.5341834425926208, "eval_runtime": 3.1318, "eval_samples_per_second": 31.93, "eval_steps_per_second": 4.151, "step": 775 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.66, "eval_loss": 0.5363546013832092, "eval_runtime": 3.131, "eval_samples_per_second": 31.938, "eval_steps_per_second": 4.152, "step": 800 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.65, "eval_loss": 0.5350150465965271, "eval_runtime": 3.1305, "eval_samples_per_second": 31.944, "eval_steps_per_second": 4.153, "step": 825 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.65, "eval_loss": 0.5382318496704102, "eval_runtime": 3.1297, "eval_samples_per_second": 31.952, "eval_steps_per_second": 4.154, "step": 850 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.65, "eval_loss": 0.5329770445823669, "eval_runtime": 3.1308, "eval_samples_per_second": 31.941, "eval_steps_per_second": 4.152, "step": 875 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.64, "eval_loss": 0.5360609292984009, "eval_runtime": 3.129, "eval_samples_per_second": 31.959, "eval_steps_per_second": 4.155, "step": 900 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.63, "eval_loss": 0.5378895998001099, "eval_runtime": 3.1316, "eval_samples_per_second": 31.932, "eval_steps_per_second": 4.151, "step": 925 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.64, "eval_loss": 0.5313640236854553, "eval_runtime": 3.1315, "eval_samples_per_second": 31.933, "eval_steps_per_second": 4.151, "step": 950 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.65, "eval_loss": 0.5307939648628235, "eval_runtime": 3.1306, "eval_samples_per_second": 31.943, "eval_steps_per_second": 4.153, "step": 975 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.0005, "loss": 0.6054, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.65, "eval_loss": 0.5347521901130676, "eval_runtime": 3.132, "eval_samples_per_second": 31.929, "eval_steps_per_second": 4.151, "step": 1000 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.64, "eval_loss": 0.5373522043228149, "eval_runtime": 3.1333, "eval_samples_per_second": 31.915, "eval_steps_per_second": 4.149, "step": 1025 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.64, "eval_loss": 0.5363101363182068, "eval_runtime": 3.1311, "eval_samples_per_second": 31.938, "eval_steps_per_second": 4.152, "step": 1050 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.64, "eval_loss": 0.5361413359642029, "eval_runtime": 3.1442, "eval_samples_per_second": 31.804, "eval_steps_per_second": 4.135, "step": 1075 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.65, "eval_loss": 0.5333101153373718, "eval_runtime": 3.1351, "eval_samples_per_second": 31.896, "eval_steps_per_second": 4.147, "step": 1100 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.65, "eval_loss": 0.5345572829246521, "eval_runtime": 3.131, "eval_samples_per_second": 31.939, "eval_steps_per_second": 4.152, "step": 1125 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.65, "eval_loss": 0.5354093313217163, "eval_runtime": 3.132, "eval_samples_per_second": 31.929, "eval_steps_per_second": 4.151, "step": 1150 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.64, "eval_loss": 0.5338207483291626, "eval_runtime": 3.1317, "eval_samples_per_second": 31.931, "eval_steps_per_second": 4.151, "step": 1175 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.65, "eval_loss": 0.5332033634185791, "eval_runtime": 3.1333, "eval_samples_per_second": 31.915, "eval_steps_per_second": 4.149, "step": 1200 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.65, "eval_loss": 0.5333837270736694, "eval_runtime": 3.1309, "eval_samples_per_second": 31.94, "eval_steps_per_second": 4.152, "step": 1225 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.65, "eval_loss": 0.5361276268959045, "eval_runtime": 3.1288, "eval_samples_per_second": 31.962, "eval_steps_per_second": 4.155, "step": 1250 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.65, "eval_loss": 0.5311057567596436, "eval_runtime": 3.1314, "eval_samples_per_second": 31.935, "eval_steps_per_second": 4.152, "step": 1275 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.66, "eval_loss": 0.5332194566726685, "eval_runtime": 3.1329, "eval_samples_per_second": 31.919, "eval_steps_per_second": 4.149, "step": 1300 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.65, "eval_loss": 0.5312312841415405, "eval_runtime": 3.1314, "eval_samples_per_second": 31.935, "eval_steps_per_second": 4.152, "step": 1325 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.65, "eval_loss": 0.5333719253540039, "eval_runtime": 3.1308, "eval_samples_per_second": 31.941, "eval_steps_per_second": 4.152, "step": 1350 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.66, "eval_loss": 0.5306155681610107, "eval_runtime": 3.1331, "eval_samples_per_second": 31.917, "eval_steps_per_second": 4.149, "step": 1375 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.65, "eval_loss": 0.5325944423675537, "eval_runtime": 3.1292, "eval_samples_per_second": 31.957, "eval_steps_per_second": 4.154, "step": 1400 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.65, "eval_loss": 0.5336495041847229, "eval_runtime": 3.1276, "eval_samples_per_second": 31.973, "eval_steps_per_second": 4.157, "step": 1425 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.65, "eval_loss": 0.5360873937606812, "eval_runtime": 3.1422, "eval_samples_per_second": 31.825, "eval_steps_per_second": 4.137, "step": 1450 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.63, "eval_loss": 0.5358687043190002, "eval_runtime": 3.1436, "eval_samples_per_second": 31.811, "eval_steps_per_second": 4.135, "step": 1475 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.00025, "loss": 0.5996, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.65, "eval_loss": 0.5341857671737671, "eval_runtime": 3.1334, "eval_samples_per_second": 31.914, "eval_steps_per_second": 4.149, "step": 1500 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.66, "eval_loss": 0.5345585942268372, "eval_runtime": 3.1431, "eval_samples_per_second": 31.816, "eval_steps_per_second": 4.136, "step": 1525 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.64, "eval_loss": 0.5333269238471985, "eval_runtime": 3.1337, "eval_samples_per_second": 31.912, "eval_steps_per_second": 4.149, "step": 1550 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.65, "eval_loss": 0.5321747660636902, "eval_runtime": 3.1351, "eval_samples_per_second": 31.896, "eval_steps_per_second": 4.147, "step": 1575 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.65, "eval_loss": 0.5307139754295349, "eval_runtime": 3.1326, "eval_samples_per_second": 31.923, "eval_steps_per_second": 4.15, "step": 1600 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.65, "eval_loss": 0.5297897458076477, "eval_runtime": 3.1367, "eval_samples_per_second": 31.881, "eval_steps_per_second": 4.144, "step": 1625 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.65, "eval_loss": 0.5299915671348572, "eval_runtime": 3.1344, "eval_samples_per_second": 31.904, "eval_steps_per_second": 4.147, "step": 1650 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.65, "eval_loss": 0.5306195616722107, "eval_runtime": 3.1331, "eval_samples_per_second": 31.917, "eval_steps_per_second": 4.149, "step": 1675 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.65, "eval_loss": 0.5310782790184021, "eval_runtime": 3.135, "eval_samples_per_second": 31.898, "eval_steps_per_second": 4.147, "step": 1700 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.65, "eval_loss": 0.5317740440368652, "eval_runtime": 3.1334, "eval_samples_per_second": 31.914, "eval_steps_per_second": 4.149, "step": 1725 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.65, "eval_loss": 0.5319586396217346, "eval_runtime": 3.1355, "eval_samples_per_second": 31.892, "eval_steps_per_second": 4.146, "step": 1750 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.65, "eval_loss": 0.5320259928703308, "eval_runtime": 3.1356, "eval_samples_per_second": 31.892, "eval_steps_per_second": 4.146, "step": 1775 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.65, "eval_loss": 0.5309186577796936, "eval_runtime": 3.1334, "eval_samples_per_second": 31.914, "eval_steps_per_second": 4.149, "step": 1800 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.65, "eval_loss": 0.5307186245918274, "eval_runtime": 3.1346, "eval_samples_per_second": 31.902, "eval_steps_per_second": 4.147, "step": 1825 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.65, "eval_loss": 0.5306009650230408, "eval_runtime": 3.135, "eval_samples_per_second": 31.898, "eval_steps_per_second": 4.147, "step": 1850 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.65, "eval_loss": 0.5314067006111145, "eval_runtime": 3.1521, "eval_samples_per_second": 31.725, "eval_steps_per_second": 4.124, "step": 1875 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.65, "eval_loss": 0.5311477184295654, "eval_runtime": 3.1462, "eval_samples_per_second": 31.785, "eval_steps_per_second": 4.132, "step": 1900 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.65, "eval_loss": 0.5311266779899597, "eval_runtime": 3.1309, "eval_samples_per_second": 31.94, "eval_steps_per_second": 4.152, "step": 1925 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.65, "eval_loss": 0.53106290102005, "eval_runtime": 3.1356, "eval_samples_per_second": 31.892, "eval_steps_per_second": 4.146, "step": 1950 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.65, "eval_loss": 0.5310864448547363, "eval_runtime": 3.1316, "eval_samples_per_second": 31.932, "eval_steps_per_second": 4.151, "step": 1975 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.596, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.65, "eval_loss": 0.5310752391815186, "eval_runtime": 3.1362, "eval_samples_per_second": 31.886, "eval_steps_per_second": 4.145, "step": 2000 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.6042405700683594, "train_runtime": 1661.2119, "train_samples_per_second": 19.263, "train_steps_per_second": 1.204 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }