{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.66, "eval_loss": 0.6010557413101196, "eval_runtime": 2.8952, "eval_samples_per_second": 34.539, "eval_steps_per_second": 4.49, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.65, "eval_loss": 0.5991448163986206, "eval_runtime": 2.9197, "eval_samples_per_second": 34.25, "eval_steps_per_second": 4.453, "step": 50 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.65, "eval_loss": 0.5983209609985352, "eval_runtime": 2.9581, "eval_samples_per_second": 33.806, "eval_steps_per_second": 4.395, "step": 75 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.65, "eval_loss": 0.6062744855880737, "eval_runtime": 2.9913, "eval_samples_per_second": 33.43, "eval_steps_per_second": 4.346, "step": 100 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.65, "eval_loss": 0.5972874164581299, "eval_runtime": 3.0135, "eval_samples_per_second": 33.184, "eval_steps_per_second": 4.314, "step": 125 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.65, "eval_loss": 0.6048508286476135, "eval_runtime": 3.0172, "eval_samples_per_second": 33.143, "eval_steps_per_second": 4.309, "step": 150 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.65, "eval_loss": 0.6030882596969604, "eval_runtime": 3.0217, "eval_samples_per_second": 33.094, "eval_steps_per_second": 4.302, "step": 175 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.65, "eval_loss": 0.6000898480415344, "eval_runtime": 3.0239, "eval_samples_per_second": 33.07, "eval_steps_per_second": 4.299, "step": 200 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.64, "eval_loss": 0.5968500971794128, "eval_runtime": 3.0268, "eval_samples_per_second": 33.038, "eval_steps_per_second": 4.295, "step": 225 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.65, "eval_loss": 0.6007280945777893, "eval_runtime": 3.0282, "eval_samples_per_second": 33.023, "eval_steps_per_second": 4.293, "step": 250 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.65, "eval_loss": 0.6015586853027344, "eval_runtime": 3.027, "eval_samples_per_second": 33.036, "eval_steps_per_second": 4.295, "step": 275 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.65, "eval_loss": 0.5992419123649597, "eval_runtime": 3.0339, "eval_samples_per_second": 32.961, "eval_steps_per_second": 4.285, "step": 300 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.65, "eval_loss": 0.5967671275138855, "eval_runtime": 3.0311, "eval_samples_per_second": 32.992, "eval_steps_per_second": 4.289, "step": 325 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.65, "eval_loss": 0.596794605255127, "eval_runtime": 3.0286, "eval_samples_per_second": 33.018, "eval_steps_per_second": 4.292, "step": 350 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.65, "eval_loss": 0.600039541721344, "eval_runtime": 3.0308, "eval_samples_per_second": 32.995, "eval_steps_per_second": 4.289, "step": 375 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.65, "eval_loss": 0.6000267267227173, "eval_runtime": 3.0298, "eval_samples_per_second": 33.006, "eval_steps_per_second": 4.291, "step": 400 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.66, "eval_loss": 0.5882958769798279, "eval_runtime": 3.029, "eval_samples_per_second": 33.015, "eval_steps_per_second": 4.292, "step": 425 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.65, "eval_loss": 0.5920267701148987, "eval_runtime": 3.0314, "eval_samples_per_second": 32.988, "eval_steps_per_second": 4.288, "step": 450 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.62, "eval_loss": 0.6034604907035828, "eval_runtime": 3.0306, "eval_samples_per_second": 32.997, "eval_steps_per_second": 4.29, "step": 475 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.00075, "loss": 0.6519, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.64, "eval_loss": 0.6074941158294678, "eval_runtime": 3.0321, "eval_samples_per_second": 32.981, "eval_steps_per_second": 4.287, "step": 500 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.65, "eval_loss": 0.5919292569160461, "eval_runtime": 3.03, "eval_samples_per_second": 33.003, "eval_steps_per_second": 4.29, "step": 525 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.63, "eval_loss": 0.5951123237609863, "eval_runtime": 3.0298, "eval_samples_per_second": 33.006, "eval_steps_per_second": 4.291, "step": 550 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.61, "eval_loss": 0.603710412979126, "eval_runtime": 3.0294, "eval_samples_per_second": 33.009, "eval_steps_per_second": 4.291, "step": 575 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.62, "eval_loss": 0.6057878732681274, "eval_runtime": 3.0274, "eval_samples_per_second": 33.032, "eval_steps_per_second": 4.294, "step": 600 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.65, "eval_loss": 0.5943801999092102, "eval_runtime": 3.0273, "eval_samples_per_second": 33.033, "eval_steps_per_second": 4.294, "step": 625 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.65, "eval_loss": 0.5938175320625305, "eval_runtime": 3.0318, "eval_samples_per_second": 32.983, "eval_steps_per_second": 4.288, "step": 650 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.66, "eval_loss": 0.5908843278884888, "eval_runtime": 3.0311, "eval_samples_per_second": 32.991, "eval_steps_per_second": 4.289, "step": 675 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 0.5914073586463928, "eval_runtime": 3.0408, "eval_samples_per_second": 32.886, "eval_steps_per_second": 4.275, "step": 700 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.66, "eval_loss": 0.5901620388031006, "eval_runtime": 3.0355, "eval_samples_per_second": 32.943, "eval_steps_per_second": 4.283, "step": 725 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.66, "eval_loss": 0.5905589461326599, "eval_runtime": 3.0236, "eval_samples_per_second": 33.073, "eval_steps_per_second": 4.3, "step": 750 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.65, "eval_loss": 0.5935985445976257, "eval_runtime": 3.023, "eval_samples_per_second": 33.08, "eval_steps_per_second": 4.3, "step": 775 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.66, "eval_loss": 0.5959981679916382, "eval_runtime": 3.0241, "eval_samples_per_second": 33.068, "eval_steps_per_second": 4.299, "step": 800 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.65, "eval_loss": 0.595251739025116, "eval_runtime": 3.0223, "eval_samples_per_second": 33.088, "eval_steps_per_second": 4.301, "step": 825 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.65, "eval_loss": 0.597002387046814, "eval_runtime": 3.0261, "eval_samples_per_second": 33.046, "eval_steps_per_second": 4.296, "step": 850 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.65, "eval_loss": 0.593684732913971, "eval_runtime": 3.0258, "eval_samples_per_second": 33.049, "eval_steps_per_second": 4.296, "step": 875 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.64, "eval_loss": 0.5954169034957886, "eval_runtime": 3.0248, "eval_samples_per_second": 33.06, "eval_steps_per_second": 4.298, "step": 900 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.63, "eval_loss": 0.5992526412010193, "eval_runtime": 3.0254, "eval_samples_per_second": 33.053, "eval_steps_per_second": 4.297, "step": 925 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.65, "eval_loss": 0.5905333161354065, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 950 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.65, "eval_loss": 0.5898448824882507, "eval_runtime": 3.0254, "eval_samples_per_second": 33.053, "eval_steps_per_second": 4.297, "step": 975 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.0005, "loss": 0.6395, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.65, "eval_loss": 0.5947265028953552, "eval_runtime": 3.0242, "eval_samples_per_second": 33.067, "eval_steps_per_second": 4.299, "step": 1000 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.64, "eval_loss": 0.5965713262557983, "eval_runtime": 3.0253, "eval_samples_per_second": 33.054, "eval_steps_per_second": 4.297, "step": 1025 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.65, "eval_loss": 0.595311164855957, "eval_runtime": 3.0266, "eval_samples_per_second": 33.04, "eval_steps_per_second": 4.295, "step": 1050 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.64, "eval_loss": 0.59684818983078, "eval_runtime": 3.0241, "eval_samples_per_second": 33.067, "eval_steps_per_second": 4.299, "step": 1075 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.65, "eval_loss": 0.5934141874313354, "eval_runtime": 3.0236, "eval_samples_per_second": 33.074, "eval_steps_per_second": 4.3, "step": 1100 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.66, "eval_loss": 0.59480881690979, "eval_runtime": 3.0258, "eval_samples_per_second": 33.049, "eval_steps_per_second": 4.296, "step": 1125 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.65, "eval_loss": 0.5958330035209656, "eval_runtime": 3.0235, "eval_samples_per_second": 33.074, "eval_steps_per_second": 4.3, "step": 1150 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.65, "eval_loss": 0.5928426384925842, "eval_runtime": 3.0224, "eval_samples_per_second": 33.086, "eval_steps_per_second": 4.301, "step": 1175 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.65, "eval_loss": 0.5922430157661438, "eval_runtime": 3.0243, "eval_samples_per_second": 33.065, "eval_steps_per_second": 4.298, "step": 1200 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.65, "eval_loss": 0.5928506255149841, "eval_runtime": 3.0246, "eval_samples_per_second": 33.063, "eval_steps_per_second": 4.298, "step": 1225 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.64, "eval_loss": 0.5967039465904236, "eval_runtime": 3.0208, "eval_samples_per_second": 33.103, "eval_steps_per_second": 4.303, "step": 1250 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.65, "eval_loss": 0.5907678008079529, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 1275 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.66, "eval_loss": 0.5929586887359619, "eval_runtime": 3.0225, "eval_samples_per_second": 33.085, "eval_steps_per_second": 4.301, "step": 1300 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.65, "eval_loss": 0.590965986251831, "eval_runtime": 3.0223, "eval_samples_per_second": 33.088, "eval_steps_per_second": 4.301, "step": 1325 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.65, "eval_loss": 0.5930989980697632, "eval_runtime": 3.0227, "eval_samples_per_second": 33.083, "eval_steps_per_second": 4.301, "step": 1350 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.66, "eval_loss": 0.5900382399559021, "eval_runtime": 3.022, "eval_samples_per_second": 33.091, "eval_steps_per_second": 4.302, "step": 1375 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.65, "eval_loss": 0.5924661159515381, "eval_runtime": 3.0221, "eval_samples_per_second": 33.09, "eval_steps_per_second": 4.302, "step": 1400 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.66, "eval_loss": 0.5937650203704834, "eval_runtime": 3.0236, "eval_samples_per_second": 33.073, "eval_steps_per_second": 4.3, "step": 1425 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.65, "eval_loss": 0.5963329076766968, "eval_runtime": 3.0249, "eval_samples_per_second": 33.059, "eval_steps_per_second": 4.298, "step": 1450 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.64, "eval_loss": 0.5954621434211731, "eval_runtime": 3.0247, "eval_samples_per_second": 33.061, "eval_steps_per_second": 4.298, "step": 1475 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.00025, "loss": 0.6331, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.65, "eval_loss": 0.5935218930244446, "eval_runtime": 3.0258, "eval_samples_per_second": 33.049, "eval_steps_per_second": 4.296, "step": 1500 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.66, "eval_loss": 0.5937374830245972, "eval_runtime": 3.0237, "eval_samples_per_second": 33.072, "eval_steps_per_second": 4.299, "step": 1525 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.65, "eval_loss": 0.5923505425453186, "eval_runtime": 3.0211, "eval_samples_per_second": 33.101, "eval_steps_per_second": 4.303, "step": 1550 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.65, "eval_loss": 0.5909485816955566, "eval_runtime": 3.0219, "eval_samples_per_second": 33.092, "eval_steps_per_second": 4.302, "step": 1575 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.65, "eval_loss": 0.589134931564331, "eval_runtime": 3.026, "eval_samples_per_second": 33.047, "eval_steps_per_second": 4.296, "step": 1600 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.65, "eval_loss": 0.5880649089813232, "eval_runtime": 3.0289, "eval_samples_per_second": 33.015, "eval_steps_per_second": 4.292, "step": 1625 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.65, "eval_loss": 0.5884314775466919, "eval_runtime": 3.0234, "eval_samples_per_second": 33.075, "eval_steps_per_second": 4.3, "step": 1650 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.65, "eval_loss": 0.5893000960350037, "eval_runtime": 3.0241, "eval_samples_per_second": 33.067, "eval_steps_per_second": 4.299, "step": 1675 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.65, "eval_loss": 0.5899676084518433, "eval_runtime": 3.028, "eval_samples_per_second": 33.025, "eval_steps_per_second": 4.293, "step": 1700 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.65, "eval_loss": 0.5908388495445251, "eval_runtime": 3.0265, "eval_samples_per_second": 33.041, "eval_steps_per_second": 4.295, "step": 1725 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.65, "eval_loss": 0.5911998748779297, "eval_runtime": 3.0244, "eval_samples_per_second": 33.065, "eval_steps_per_second": 4.298, "step": 1750 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.65, "eval_loss": 0.5914051532745361, "eval_runtime": 3.0277, "eval_samples_per_second": 33.029, "eval_steps_per_second": 4.294, "step": 1775 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.65, "eval_loss": 0.5901281237602234, "eval_runtime": 3.0271, "eval_samples_per_second": 33.035, "eval_steps_per_second": 4.294, "step": 1800 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.65, "eval_loss": 0.5898309350013733, "eval_runtime": 3.0251, "eval_samples_per_second": 33.057, "eval_steps_per_second": 4.297, "step": 1825 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.65, "eval_loss": 0.589576005935669, "eval_runtime": 3.0338, "eval_samples_per_second": 32.961, "eval_steps_per_second": 4.285, "step": 1850 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.65, "eval_loss": 0.5904554128646851, "eval_runtime": 3.0252, "eval_samples_per_second": 33.055, "eval_steps_per_second": 4.297, "step": 1875 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.65, "eval_loss": 0.5900978446006775, "eval_runtime": 3.0257, "eval_samples_per_second": 33.05, "eval_steps_per_second": 4.296, "step": 1900 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.65, "eval_loss": 0.5900523662567139, "eval_runtime": 3.0269, "eval_samples_per_second": 33.037, "eval_steps_per_second": 4.295, "step": 1925 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.65, "eval_loss": 0.5899617671966553, "eval_runtime": 3.0271, "eval_samples_per_second": 33.034, "eval_steps_per_second": 4.294, "step": 1950 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.65, "eval_loss": 0.5899820327758789, "eval_runtime": 3.0253, "eval_samples_per_second": 33.055, "eval_steps_per_second": 4.297, "step": 1975 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.6276, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.65, "eval_loss": 0.5899689793586731, "eval_runtime": 3.0275, "eval_samples_per_second": 33.03, "eval_steps_per_second": 4.294, "step": 2000 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.6380199432373047, "train_runtime": 1620.376, "train_samples_per_second": 19.749, "train_steps_per_second": 1.234 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }