{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.57, "eval_loss": 0.627579391002655, "eval_runtime": 2.9468, "eval_samples_per_second": 33.935, "eval_steps_per_second": 4.412, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.57, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.63, "eval_loss": 0.6135770678520203, "eval_runtime": 3.0105, "eval_samples_per_second": 33.217, "eval_steps_per_second": 4.318, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.63, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.66, "eval_loss": 0.6773912906646729, "eval_runtime": 3.0612, "eval_samples_per_second": 32.667, "eval_steps_per_second": 4.247, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.64, "eval_loss": 0.5963658690452576, "eval_runtime": 3.096, "eval_samples_per_second": 32.3, "eval_steps_per_second": 4.199, "step": 100 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.62, "eval_loss": 0.5315800309181213, "eval_runtime": 3.1161, "eval_samples_per_second": 32.091, "eval_steps_per_second": 4.172, "step": 125 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.62, "eval_loss": 0.5231013298034668, "eval_runtime": 3.1314, "eval_samples_per_second": 31.935, "eval_steps_per_second": 4.151, "step": 150 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.63, "eval_loss": 0.5155523419380188, "eval_runtime": 3.1398, "eval_samples_per_second": 31.849, "eval_steps_per_second": 4.14, "step": 175 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.64, "eval_loss": 0.6215571761131287, "eval_runtime": 3.1438, "eval_samples_per_second": 31.809, "eval_steps_per_second": 4.135, "step": 200 }, { "best_epoch": 2, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.71, "eval_loss": 0.5012997388839722, "eval_runtime": 3.1482, "eval_samples_per_second": 31.764, "eval_steps_per_second": 4.129, "step": 225 }, { "best_epoch": 8, "best_eval_accuracy": 0.71, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.7, "eval_loss": 0.5733979940414429, "eval_runtime": 3.1518, "eval_samples_per_second": 31.728, "eval_steps_per_second": 4.125, "step": 250 }, { "best_epoch": 8, "best_eval_accuracy": 0.71, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.66, "eval_loss": 0.4682706594467163, "eval_runtime": 3.1601, "eval_samples_per_second": 31.645, "eval_steps_per_second": 4.114, "step": 275 }, { "best_epoch": 8, "best_eval_accuracy": 0.71, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.73, "eval_loss": 0.5332651734352112, "eval_runtime": 3.1621, "eval_samples_per_second": 31.624, "eval_steps_per_second": 4.111, "step": 300 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.69, "eval_loss": 0.6740408539772034, "eval_runtime": 3.1473, "eval_samples_per_second": 31.773, "eval_steps_per_second": 4.131, "step": 325 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.71, "eval_loss": 0.5184910893440247, "eval_runtime": 3.1461, "eval_samples_per_second": 31.786, "eval_steps_per_second": 4.132, "step": 350 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.71, "eval_loss": 0.5030907392501831, "eval_runtime": 3.1466, "eval_samples_per_second": 31.78, "eval_steps_per_second": 4.131, "step": 375 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.71, "eval_loss": 0.5397804379463196, "eval_runtime": 3.1405, "eval_samples_per_second": 31.843, "eval_steps_per_second": 4.14, "step": 400 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.73, "eval_loss": 0.5245677828788757, "eval_runtime": 3.1417, "eval_samples_per_second": 31.83, "eval_steps_per_second": 4.138, "step": 425 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.69, "eval_loss": 0.7413635849952698, "eval_runtime": 3.149, "eval_samples_per_second": 31.756, "eval_steps_per_second": 4.128, "step": 450 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.72, "eval_loss": 0.6816568970680237, "eval_runtime": 3.1399, "eval_samples_per_second": 31.849, "eval_steps_per_second": 4.14, "step": 475 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.015, "loss": 0.7352, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.71, "eval_loss": 0.6655824184417725, "eval_runtime": 3.1414, "eval_samples_per_second": 31.833, "eval_steps_per_second": 4.138, "step": 500 }, { "best_epoch": 11, "best_eval_accuracy": 0.73, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.76, "eval_loss": 0.5838807225227356, "eval_runtime": 3.1408, "eval_samples_per_second": 31.839, "eval_steps_per_second": 4.139, "step": 525 }, { "best_epoch": 20, "best_eval_accuracy": 0.76, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.76, "eval_loss": 0.6626308560371399, "eval_runtime": 3.1413, "eval_samples_per_second": 31.834, "eval_steps_per_second": 4.138, "step": 550 }, { "best_epoch": 20, "best_eval_accuracy": 0.76, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.75, "eval_loss": 0.5017469525337219, "eval_runtime": 3.1427, "eval_samples_per_second": 31.82, "eval_steps_per_second": 4.137, "step": 575 }, { "best_epoch": 20, "best_eval_accuracy": 0.76, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.74, "eval_loss": 0.5168166160583496, "eval_runtime": 3.1452, "eval_samples_per_second": 31.795, "eval_steps_per_second": 4.133, "step": 600 }, { "best_epoch": 20, "best_eval_accuracy": 0.76, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.78, "eval_loss": 0.5911619067192078, "eval_runtime": 3.1431, "eval_samples_per_second": 31.816, "eval_steps_per_second": 4.136, "step": 625 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.77, "eval_loss": 0.5595811605453491, "eval_runtime": 3.1433, "eval_samples_per_second": 31.813, "eval_steps_per_second": 4.136, "step": 650 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.77, "eval_loss": 0.48836490511894226, "eval_runtime": 3.1431, "eval_samples_per_second": 31.816, "eval_steps_per_second": 4.136, "step": 675 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.73, "eval_loss": 0.47384893894195557, "eval_runtime": 3.142, "eval_samples_per_second": 31.826, "eval_steps_per_second": 4.137, "step": 700 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.76, "eval_loss": 0.5052289366722107, "eval_runtime": 3.1453, "eval_samples_per_second": 31.794, "eval_steps_per_second": 4.133, "step": 725 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.74, "eval_loss": 0.6162938475608826, "eval_runtime": 3.1453, "eval_samples_per_second": 31.793, "eval_steps_per_second": 4.133, "step": 750 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.74, "eval_loss": 0.5823907256126404, "eval_runtime": 3.143, "eval_samples_per_second": 31.817, "eval_steps_per_second": 4.136, "step": 775 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.72, "eval_loss": 0.4995167553424835, "eval_runtime": 3.1421, "eval_samples_per_second": 31.826, "eval_steps_per_second": 4.137, "step": 800 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.71, "eval_loss": 0.4935573935508728, "eval_runtime": 3.1423, "eval_samples_per_second": 31.824, "eval_steps_per_second": 4.137, "step": 825 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.72, "eval_loss": 0.5463616847991943, "eval_runtime": 3.1442, "eval_samples_per_second": 31.804, "eval_steps_per_second": 4.135, "step": 850 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.74, "eval_loss": 0.5164341926574707, "eval_runtime": 3.1446, "eval_samples_per_second": 31.801, "eval_steps_per_second": 4.134, "step": 875 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.75, "eval_loss": 0.5088481307029724, "eval_runtime": 3.1452, "eval_samples_per_second": 31.795, "eval_steps_per_second": 4.133, "step": 900 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.75, "eval_loss": 0.599052906036377, "eval_runtime": 3.1458, "eval_samples_per_second": 31.788, "eval_steps_per_second": 4.132, "step": 925 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.73, "eval_loss": 0.49629107117652893, "eval_runtime": 3.1523, "eval_samples_per_second": 31.723, "eval_steps_per_second": 4.124, "step": 950 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.72, "eval_loss": 0.5085676312446594, "eval_runtime": 3.1502, "eval_samples_per_second": 31.744, "eval_steps_per_second": 4.127, "step": 975 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.01, "loss": 0.411, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.73, "eval_loss": 0.5202763080596924, "eval_runtime": 3.1439, "eval_samples_per_second": 31.808, "eval_steps_per_second": 4.135, "step": 1000 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.74, "eval_loss": 0.5843695998191833, "eval_runtime": 3.1411, "eval_samples_per_second": 31.836, "eval_steps_per_second": 4.139, "step": 1025 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.74, "eval_loss": 0.5285233855247498, "eval_runtime": 3.1423, "eval_samples_per_second": 31.824, "eval_steps_per_second": 4.137, "step": 1050 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.74, "eval_loss": 0.5552850961685181, "eval_runtime": 3.1448, "eval_samples_per_second": 31.799, "eval_steps_per_second": 4.134, "step": 1075 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.71, "eval_loss": 0.5588386058807373, "eval_runtime": 3.1454, "eval_samples_per_second": 31.793, "eval_steps_per_second": 4.133, "step": 1100 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.72, "eval_loss": 0.5391697883605957, "eval_runtime": 3.1435, "eval_samples_per_second": 31.812, "eval_steps_per_second": 4.136, "step": 1125 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.72, "eval_loss": 0.5494285225868225, "eval_runtime": 3.1433, "eval_samples_per_second": 31.814, "eval_steps_per_second": 4.136, "step": 1150 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.76, "eval_loss": 0.4982169270515442, "eval_runtime": 3.1427, "eval_samples_per_second": 31.819, "eval_steps_per_second": 4.137, "step": 1175 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.72, "eval_loss": 0.5374172329902649, "eval_runtime": 3.1407, "eval_samples_per_second": 31.84, "eval_steps_per_second": 4.139, "step": 1200 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.73, "eval_loss": 0.5730433464050293, "eval_runtime": 3.142, "eval_samples_per_second": 31.827, "eval_steps_per_second": 4.137, "step": 1225 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.72, "eval_loss": 0.51490718126297, "eval_runtime": 3.1446, "eval_samples_per_second": 31.8, "eval_steps_per_second": 4.134, "step": 1250 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.72, "eval_loss": 0.49485138058662415, "eval_runtime": 3.1403, "eval_samples_per_second": 31.844, "eval_steps_per_second": 4.14, "step": 1275 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.73, "eval_loss": 0.5295071601867676, "eval_runtime": 3.1397, "eval_samples_per_second": 31.851, "eval_steps_per_second": 4.141, "step": 1300 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.72, "eval_loss": 0.5223097801208496, "eval_runtime": 3.1423, "eval_samples_per_second": 31.824, "eval_steps_per_second": 4.137, "step": 1325 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.71, "eval_loss": 0.5616713762283325, "eval_runtime": 3.141, "eval_samples_per_second": 31.837, "eval_steps_per_second": 4.139, "step": 1350 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.72, "eval_loss": 0.5373037457466125, "eval_runtime": 3.1394, "eval_samples_per_second": 31.853, "eval_steps_per_second": 4.141, "step": 1375 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.73, "eval_loss": 0.48570069670677185, "eval_runtime": 3.1398, "eval_samples_per_second": 31.849, "eval_steps_per_second": 4.14, "step": 1400 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.72, "eval_loss": 0.49541863799095154, "eval_runtime": 3.1394, "eval_samples_per_second": 31.853, "eval_steps_per_second": 4.141, "step": 1425 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.72, "eval_loss": 0.5024493932723999, "eval_runtime": 3.1403, "eval_samples_per_second": 31.844, "eval_steps_per_second": 4.14, "step": 1450 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.74, "eval_loss": 0.497109591960907, "eval_runtime": 3.1489, "eval_samples_per_second": 31.757, "eval_steps_per_second": 4.128, "step": 1475 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.005, "loss": 0.318, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.73, "eval_loss": 0.5264896154403687, "eval_runtime": 3.1479, "eval_samples_per_second": 31.767, "eval_steps_per_second": 4.13, "step": 1500 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.71, "eval_loss": 0.4966976046562195, "eval_runtime": 3.1464, "eval_samples_per_second": 31.783, "eval_steps_per_second": 4.132, "step": 1525 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.73, "eval_loss": 0.497234046459198, "eval_runtime": 3.1465, "eval_samples_per_second": 31.782, "eval_steps_per_second": 4.132, "step": 1550 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.72, "eval_loss": 0.4908214509487152, "eval_runtime": 3.1342, "eval_samples_per_second": 31.906, "eval_steps_per_second": 4.148, "step": 1575 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.74, "eval_loss": 0.5055844783782959, "eval_runtime": 3.1392, "eval_samples_per_second": 31.855, "eval_steps_per_second": 4.141, "step": 1600 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.74, "eval_loss": 0.5230618119239807, "eval_runtime": 3.1268, "eval_samples_per_second": 31.981, "eval_steps_per_second": 4.158, "step": 1625 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.75, "eval_loss": 0.47373998165130615, "eval_runtime": 3.1252, "eval_samples_per_second": 31.998, "eval_steps_per_second": 4.16, "step": 1650 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.72, "eval_loss": 0.5015537142753601, "eval_runtime": 3.1238, "eval_samples_per_second": 32.012, "eval_steps_per_second": 4.162, "step": 1675 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.73, "eval_loss": 0.49883437156677246, "eval_runtime": 3.1204, "eval_samples_per_second": 32.047, "eval_steps_per_second": 4.166, "step": 1700 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.74, "eval_loss": 0.5276287794113159, "eval_runtime": 3.1254, "eval_samples_per_second": 31.996, "eval_steps_per_second": 4.16, "step": 1725 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.73, "eval_loss": 0.4912046790122986, "eval_runtime": 3.1233, "eval_samples_per_second": 32.018, "eval_steps_per_second": 4.162, "step": 1750 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.72, "eval_loss": 0.48646289110183716, "eval_runtime": 3.1257, "eval_samples_per_second": 31.993, "eval_steps_per_second": 4.159, "step": 1775 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.73, "eval_loss": 0.475396066904068, "eval_runtime": 3.1256, "eval_samples_per_second": 31.994, "eval_steps_per_second": 4.159, "step": 1800 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.73, "eval_loss": 0.49221473932266235, "eval_runtime": 3.1249, "eval_samples_per_second": 32.001, "eval_steps_per_second": 4.16, "step": 1825 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.74, "eval_loss": 0.4884068965911865, "eval_runtime": 3.1256, "eval_samples_per_second": 31.994, "eval_steps_per_second": 4.159, "step": 1850 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.73, "eval_loss": 0.48677849769592285, "eval_runtime": 3.1234, "eval_samples_per_second": 32.017, "eval_steps_per_second": 4.162, "step": 1875 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.73, "eval_loss": 0.48715740442276, "eval_runtime": 3.1231, "eval_samples_per_second": 32.02, "eval_steps_per_second": 4.163, "step": 1900 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.72, "eval_loss": 0.4847962260246277, "eval_runtime": 3.1233, "eval_samples_per_second": 32.017, "eval_steps_per_second": 4.162, "step": 1925 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.72, "eval_loss": 0.4922550320625305, "eval_runtime": 3.1248, "eval_samples_per_second": 32.002, "eval_steps_per_second": 4.16, "step": 1950 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.73, "eval_loss": 0.4888269007205963, "eval_runtime": 3.1211, "eval_samples_per_second": 32.04, "eval_steps_per_second": 4.165, "step": 1975 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.287, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.72, "eval_loss": 0.4912576377391815, "eval_runtime": 3.1223, "eval_samples_per_second": 32.028, "eval_steps_per_second": 4.164, "step": 2000 }, { "best_epoch": 24, "best_eval_accuracy": 0.78, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.43781012725830076, "train_runtime": 1665.2419, "train_samples_per_second": 19.216, "train_steps_per_second": 1.201 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }