{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6, "eval_loss": 0.7348268628120422, "eval_runtime": 2.9006, "eval_samples_per_second": 34.475, "eval_steps_per_second": 4.482, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.6, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.61, "eval_loss": 0.6044729948043823, "eval_runtime": 2.9185, "eval_samples_per_second": 34.264, "eval_steps_per_second": 4.454, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.62, "eval_loss": 0.9239120483398438, "eval_runtime": 2.9442, "eval_samples_per_second": 33.965, "eval_steps_per_second": 4.415, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.62, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.69, "eval_loss": 0.6379037499427795, "eval_runtime": 2.9811, "eval_samples_per_second": 33.545, "eval_steps_per_second": 4.361, "step": 100 }, { "best_epoch": 3, "best_eval_accuracy": 0.69, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.72, "eval_loss": 0.5724024176597595, "eval_runtime": 2.9944, "eval_samples_per_second": 33.396, "eval_steps_per_second": 4.341, "step": 125 }, { "best_epoch": 4, "best_eval_accuracy": 0.72, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.69, "eval_loss": 1.208309292793274, "eval_runtime": 3.0023, "eval_samples_per_second": 33.308, "eval_steps_per_second": 4.33, "step": 150 }, { "best_epoch": 4, "best_eval_accuracy": 0.72, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.67, "eval_loss": 1.3074427843093872, "eval_runtime": 3.0095, "eval_samples_per_second": 33.228, "eval_steps_per_second": 4.32, "step": 175 }, { "best_epoch": 4, "best_eval_accuracy": 0.72, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.7, "eval_loss": 1.1625832319259644, "eval_runtime": 3.0155, "eval_samples_per_second": 33.162, "eval_steps_per_second": 4.311, "step": 200 }, { "best_epoch": 4, "best_eval_accuracy": 0.72, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.64, "eval_loss": 1.0019404888153076, "eval_runtime": 3.0171, "eval_samples_per_second": 33.144, "eval_steps_per_second": 4.309, "step": 225 }, { "best_epoch": 4, "best_eval_accuracy": 0.72, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.73, "eval_loss": 0.6239729523658752, "eval_runtime": 3.019, "eval_samples_per_second": 33.124, "eval_steps_per_second": 4.306, "step": 250 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.66, "eval_loss": 1.0828949213027954, "eval_runtime": 3.0193, "eval_samples_per_second": 33.12, "eval_steps_per_second": 4.306, "step": 275 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.66, "eval_loss": 0.8052525520324707, "eval_runtime": 3.0244, "eval_samples_per_second": 33.064, "eval_steps_per_second": 4.298, "step": 300 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.63, "eval_loss": 1.152608871459961, "eval_runtime": 3.0194, "eval_samples_per_second": 33.119, "eval_steps_per_second": 4.305, "step": 325 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.69, "eval_loss": 1.2005667686462402, "eval_runtime": 3.0191, "eval_samples_per_second": 33.122, "eval_steps_per_second": 4.306, "step": 350 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.67, "eval_loss": 1.1382259130477905, "eval_runtime": 3.0159, "eval_samples_per_second": 33.157, "eval_steps_per_second": 4.31, "step": 375 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.71, "eval_loss": 1.134488821029663, "eval_runtime": 3.0167, "eval_samples_per_second": 33.148, "eval_steps_per_second": 4.309, "step": 400 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.67, "eval_loss": 1.5028630495071411, "eval_runtime": 3.0173, "eval_samples_per_second": 33.142, "eval_steps_per_second": 4.308, "step": 425 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.67, "eval_loss": 1.3780053853988647, "eval_runtime": 3.0177, "eval_samples_per_second": 33.138, "eval_steps_per_second": 4.308, "step": 450 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.66, "eval_loss": 1.1811206340789795, "eval_runtime": 3.0193, "eval_samples_per_second": 33.12, "eval_steps_per_second": 4.306, "step": 475 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.037500000000000006, "loss": 1.3151, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.7, "eval_loss": 1.246147871017456, "eval_runtime": 3.0186, "eval_samples_per_second": 33.128, "eval_steps_per_second": 4.307, "step": 500 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.68, "eval_loss": 1.2269139289855957, "eval_runtime": 3.0181, "eval_samples_per_second": 33.133, "eval_steps_per_second": 4.307, "step": 525 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.68, "eval_loss": 1.151503324508667, "eval_runtime": 3.018, "eval_samples_per_second": 33.135, "eval_steps_per_second": 4.307, "step": 550 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.66, "eval_loss": 0.9943810105323792, "eval_runtime": 3.0183, "eval_samples_per_second": 33.131, "eval_steps_per_second": 4.307, "step": 575 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.67, "eval_loss": 1.2708048820495605, "eval_runtime": 3.02, "eval_samples_per_second": 33.112, "eval_steps_per_second": 4.305, "step": 600 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.65, "eval_loss": 1.5816699266433716, "eval_runtime": 3.0182, "eval_samples_per_second": 33.132, "eval_steps_per_second": 4.307, "step": 625 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.71, "eval_loss": 1.093419075012207, "eval_runtime": 3.0169, "eval_samples_per_second": 33.147, "eval_steps_per_second": 4.309, "step": 650 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.67, "eval_loss": 1.4179078340530396, "eval_runtime": 3.017, "eval_samples_per_second": 33.146, "eval_steps_per_second": 4.309, "step": 675 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 1.4260410070419312, "eval_runtime": 3.0174, "eval_samples_per_second": 33.141, "eval_steps_per_second": 4.308, "step": 700 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.65, "eval_loss": 1.381821870803833, "eval_runtime": 3.0154, "eval_samples_per_second": 33.163, "eval_steps_per_second": 4.311, "step": 725 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.66, "eval_loss": 1.7165961265563965, "eval_runtime": 3.0194, "eval_samples_per_second": 33.119, "eval_steps_per_second": 4.305, "step": 750 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.64, "eval_loss": 1.1709787845611572, "eval_runtime": 3.0169, "eval_samples_per_second": 33.147, "eval_steps_per_second": 4.309, "step": 775 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.64, "eval_loss": 1.0659561157226562, "eval_runtime": 3.0169, "eval_samples_per_second": 33.146, "eval_steps_per_second": 4.309, "step": 800 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.69, "eval_loss": 1.012652039527893, "eval_runtime": 3.0164, "eval_samples_per_second": 33.153, "eval_steps_per_second": 4.31, "step": 825 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.68, "eval_loss": 0.9809643030166626, "eval_runtime": 3.0193, "eval_samples_per_second": 33.121, "eval_steps_per_second": 4.306, "step": 850 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.7, "eval_loss": 1.1077015399932861, "eval_runtime": 3.0155, "eval_samples_per_second": 33.162, "eval_steps_per_second": 4.311, "step": 875 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.66, "eval_loss": 1.0629407167434692, "eval_runtime": 3.0168, "eval_samples_per_second": 33.147, "eval_steps_per_second": 4.309, "step": 900 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.69, "eval_loss": 1.5933445692062378, "eval_runtime": 3.035, "eval_samples_per_second": 32.949, "eval_steps_per_second": 4.283, "step": 925 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.71, "eval_loss": 1.1321765184402466, "eval_runtime": 3.0282, "eval_samples_per_second": 33.023, "eval_steps_per_second": 4.293, "step": 950 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.73, "eval_loss": 1.0734666585922241, "eval_runtime": 3.0186, "eval_samples_per_second": 33.128, "eval_steps_per_second": 4.307, "step": 975 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.025, "loss": 0.6791, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.72, "eval_loss": 0.894046425819397, "eval_runtime": 3.0277, "eval_samples_per_second": 33.028, "eval_steps_per_second": 4.294, "step": 1000 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.67, "eval_loss": 0.9348558783531189, "eval_runtime": 3.0186, "eval_samples_per_second": 33.128, "eval_steps_per_second": 4.307, "step": 1025 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.67, "eval_loss": 0.8961507678031921, "eval_runtime": 3.0195, "eval_samples_per_second": 33.118, "eval_steps_per_second": 4.305, "step": 1050 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.69, "eval_loss": 1.0662699937820435, "eval_runtime": 3.0298, "eval_samples_per_second": 33.006, "eval_steps_per_second": 4.291, "step": 1075 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.69, "eval_loss": 0.9680778980255127, "eval_runtime": 3.0292, "eval_samples_per_second": 33.012, "eval_steps_per_second": 4.292, "step": 1100 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.68, "eval_loss": 0.7694079875946045, "eval_runtime": 3.0181, "eval_samples_per_second": 33.134, "eval_steps_per_second": 4.307, "step": 1125 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.71, "eval_loss": 1.0310795307159424, "eval_runtime": 3.0271, "eval_samples_per_second": 33.035, "eval_steps_per_second": 4.295, "step": 1150 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.7, "eval_loss": 0.7406817674636841, "eval_runtime": 3.0217, "eval_samples_per_second": 33.094, "eval_steps_per_second": 4.302, "step": 1175 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.69, "eval_loss": 0.686066210269928, "eval_runtime": 3.0204, "eval_samples_per_second": 33.108, "eval_steps_per_second": 4.304, "step": 1200 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.69, "eval_loss": 0.9919649362564087, "eval_runtime": 3.0244, "eval_samples_per_second": 33.064, "eval_steps_per_second": 4.298, "step": 1225 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.69, "eval_loss": 0.7187044620513916, "eval_runtime": 3.0245, "eval_samples_per_second": 33.064, "eval_steps_per_second": 4.298, "step": 1250 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.72, "eval_loss": 0.7602307200431824, "eval_runtime": 3.0264, "eval_samples_per_second": 33.043, "eval_steps_per_second": 4.296, "step": 1275 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.69, "eval_loss": 0.728481650352478, "eval_runtime": 3.0237, "eval_samples_per_second": 33.072, "eval_steps_per_second": 4.299, "step": 1300 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.68, "eval_loss": 0.8232598304748535, "eval_runtime": 3.0259, "eval_samples_per_second": 33.049, "eval_steps_per_second": 4.296, "step": 1325 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.7, "eval_loss": 0.7931870818138123, "eval_runtime": 3.0274, "eval_samples_per_second": 33.032, "eval_steps_per_second": 4.294, "step": 1350 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.71, "eval_loss": 0.8861235976219177, "eval_runtime": 3.0352, "eval_samples_per_second": 32.947, "eval_steps_per_second": 4.283, "step": 1375 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.71, "eval_loss": 0.7876631021499634, "eval_runtime": 3.0388, "eval_samples_per_second": 32.908, "eval_steps_per_second": 4.278, "step": 1400 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.7, "eval_loss": 0.7689203023910522, "eval_runtime": 3.0331, "eval_samples_per_second": 32.97, "eval_steps_per_second": 4.286, "step": 1425 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.7, "eval_loss": 0.791912317276001, "eval_runtime": 3.0297, "eval_samples_per_second": 33.006, "eval_steps_per_second": 4.291, "step": 1450 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.7, "eval_loss": 0.7441076636314392, "eval_runtime": 3.0301, "eval_samples_per_second": 33.003, "eval_steps_per_second": 4.29, "step": 1475 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0125, "loss": 0.3594, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.69, "eval_loss": 0.8327054381370544, "eval_runtime": 3.0351, "eval_samples_per_second": 32.948, "eval_steps_per_second": 4.283, "step": 1500 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.71, "eval_loss": 0.641401469707489, "eval_runtime": 3.0414, "eval_samples_per_second": 32.88, "eval_steps_per_second": 4.274, "step": 1525 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.71, "eval_loss": 0.6702091097831726, "eval_runtime": 3.0349, "eval_samples_per_second": 32.95, "eval_steps_per_second": 4.283, "step": 1550 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.71, "eval_loss": 0.6862046718597412, "eval_runtime": 3.0397, "eval_samples_per_second": 32.898, "eval_steps_per_second": 4.277, "step": 1575 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.68, "eval_loss": 0.6349497437477112, "eval_runtime": 3.0377, "eval_samples_per_second": 32.92, "eval_steps_per_second": 4.28, "step": 1600 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.69, "eval_loss": 0.6800190210342407, "eval_runtime": 3.0362, "eval_samples_per_second": 32.936, "eval_steps_per_second": 4.282, "step": 1625 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.69, "eval_loss": 0.7005452513694763, "eval_runtime": 3.035, "eval_samples_per_second": 32.949, "eval_steps_per_second": 4.283, "step": 1650 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.71, "eval_loss": 0.705758273601532, "eval_runtime": 3.0397, "eval_samples_per_second": 32.898, "eval_steps_per_second": 4.277, "step": 1675 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.73, "eval_loss": 0.6880057454109192, "eval_runtime": 3.0368, "eval_samples_per_second": 32.929, "eval_steps_per_second": 4.281, "step": 1700 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.72, "eval_loss": 0.6774427890777588, "eval_runtime": 3.035, "eval_samples_per_second": 32.949, "eval_steps_per_second": 4.283, "step": 1725 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.73, "eval_loss": 0.6815641522407532, "eval_runtime": 3.0352, "eval_samples_per_second": 32.947, "eval_steps_per_second": 4.283, "step": 1750 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.72, "eval_loss": 0.7138052582740784, "eval_runtime": 3.0374, "eval_samples_per_second": 32.923, "eval_steps_per_second": 4.28, "step": 1775 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.69, "eval_loss": 0.6311395168304443, "eval_runtime": 3.0362, "eval_samples_per_second": 32.936, "eval_steps_per_second": 4.282, "step": 1800 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.69, "eval_loss": 0.6578870415687561, "eval_runtime": 3.0358, "eval_samples_per_second": 32.94, "eval_steps_per_second": 4.282, "step": 1825 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.69, "eval_loss": 0.6955811381340027, "eval_runtime": 3.0392, "eval_samples_per_second": 32.903, "eval_steps_per_second": 4.277, "step": 1850 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.69, "eval_loss": 0.6341457366943359, "eval_runtime": 3.0349, "eval_samples_per_second": 32.95, "eval_steps_per_second": 4.283, "step": 1875 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.7, "eval_loss": 0.6722196936607361, "eval_runtime": 3.0452, "eval_samples_per_second": 32.838, "eval_steps_per_second": 4.269, "step": 1900 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.7, "eval_loss": 0.6459449529647827, "eval_runtime": 3.0323, "eval_samples_per_second": 32.978, "eval_steps_per_second": 4.287, "step": 1925 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.68, "eval_loss": 0.6350699663162231, "eval_runtime": 3.0361, "eval_samples_per_second": 32.937, "eval_steps_per_second": 4.282, "step": 1950 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.68, "eval_loss": 0.6435720324516296, "eval_runtime": 3.0398, "eval_samples_per_second": 32.897, "eval_steps_per_second": 4.277, "step": 1975 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.2323, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.69, "eval_loss": 0.6392772793769836, "eval_runtime": 3.0389, "eval_samples_per_second": 32.907, "eval_steps_per_second": 4.278, "step": 2000 }, { "best_epoch": 9, "best_eval_accuracy": 0.73, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.6464771995544434, "train_runtime": 1617.1485, "train_samples_per_second": 19.788, "train_steps_per_second": 1.237 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }