{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.46, "eval_loss": 0.49578723311424255, "eval_runtime": 2.9005, "eval_samples_per_second": 34.477, "eval_steps_per_second": 4.482, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.46, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.54, "eval_loss": 0.5955537557601929, "eval_runtime": 2.9308, "eval_samples_per_second": 34.12, "eval_steps_per_second": 4.436, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.54, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.45, "eval_loss": 0.5376533269882202, "eval_runtime": 2.9692, "eval_samples_per_second": 33.679, "eval_steps_per_second": 4.378, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.54, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.61, "eval_loss": 0.4201565980911255, "eval_runtime": 2.9986, "eval_samples_per_second": 33.349, "eval_steps_per_second": 4.335, "step": 100 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.44, "eval_loss": 0.43667927384376526, "eval_runtime": 3.0016, "eval_samples_per_second": 33.316, "eval_steps_per_second": 4.331, "step": 125 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.51, "eval_loss": 0.43700677156448364, "eval_runtime": 3.0099, "eval_samples_per_second": 33.223, "eval_steps_per_second": 4.319, "step": 150 }, { "best_epoch": 3, "best_eval_accuracy": 0.61, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.42071959376335144, "eval_runtime": 3.0221, "eval_samples_per_second": 33.09, "eval_steps_per_second": 4.302, "step": 175 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.58, "eval_loss": 0.4423440098762512, "eval_runtime": 3.0262, "eval_samples_per_second": 33.044, "eval_steps_per_second": 4.296, "step": 200 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.61, "eval_loss": 0.41069385409355164, "eval_runtime": 3.0271, "eval_samples_per_second": 33.035, "eval_steps_per_second": 4.294, "step": 225 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.64, "eval_loss": 0.4332168996334076, "eval_runtime": 3.0297, "eval_samples_per_second": 33.007, "eval_steps_per_second": 4.291, "step": 250 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.6, "eval_loss": 0.4055112898349762, "eval_runtime": 3.031, "eval_samples_per_second": 32.992, "eval_steps_per_second": 4.289, "step": 275 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.63, "eval_loss": 0.4375588893890381, "eval_runtime": 3.0325, "eval_samples_per_second": 32.977, "eval_steps_per_second": 4.287, "step": 300 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.57, "eval_loss": 0.40624508261680603, "eval_runtime": 3.0305, "eval_samples_per_second": 32.998, "eval_steps_per_second": 4.29, "step": 325 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.61, "eval_loss": 0.4000048339366913, "eval_runtime": 3.0315, "eval_samples_per_second": 32.986, "eval_steps_per_second": 4.288, "step": 350 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.63, "eval_loss": 0.40517398715019226, "eval_runtime": 3.0336, "eval_samples_per_second": 32.965, "eval_steps_per_second": 4.285, "step": 375 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.68, "eval_loss": 0.3960806727409363, "eval_runtime": 3.0345, "eval_samples_per_second": 32.955, "eval_steps_per_second": 4.284, "step": 400 }, { "best_epoch": 15, "best_eval_accuracy": 0.68, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.67, "eval_loss": 0.3975551724433899, "eval_runtime": 3.0333, "eval_samples_per_second": 32.968, "eval_steps_per_second": 4.286, "step": 425 }, { "best_epoch": 15, "best_eval_accuracy": 0.68, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.65, "eval_loss": 0.41863301396369934, "eval_runtime": 3.0326, "eval_samples_per_second": 32.975, "eval_steps_per_second": 4.287, "step": 450 }, { "best_epoch": 15, "best_eval_accuracy": 0.68, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.63, "eval_loss": 0.4303971230983734, "eval_runtime": 3.0325, "eval_samples_per_second": 32.976, "eval_steps_per_second": 4.287, "step": 475 }, { "best_epoch": 15, "best_eval_accuracy": 0.68, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.015, "loss": 0.731, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.69, "eval_loss": 0.4357996881008148, "eval_runtime": 3.0345, "eval_samples_per_second": 32.955, "eval_steps_per_second": 4.284, "step": 500 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.68, "eval_loss": 0.41348427534103394, "eval_runtime": 3.0336, "eval_samples_per_second": 32.964, "eval_steps_per_second": 4.285, "step": 525 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.68, "eval_loss": 0.4180167019367218, "eval_runtime": 3.0311, "eval_samples_per_second": 32.991, "eval_steps_per_second": 4.289, "step": 550 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.66, "eval_loss": 0.46266496181488037, "eval_runtime": 3.0317, "eval_samples_per_second": 32.985, "eval_steps_per_second": 4.288, "step": 575 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.65, "eval_loss": 0.4150283932685852, "eval_runtime": 3.0391, "eval_samples_per_second": 32.904, "eval_steps_per_second": 4.278, "step": 600 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.67, "eval_loss": 0.4004598557949066, "eval_runtime": 3.032, "eval_samples_per_second": 32.982, "eval_steps_per_second": 4.288, "step": 625 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.7, "eval_loss": 0.4123203158378601, "eval_runtime": 3.0302, "eval_samples_per_second": 33.001, "eval_steps_per_second": 4.29, "step": 650 }, { "best_epoch": 25, "best_eval_accuracy": 0.7, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.69, "eval_loss": 0.4341800808906555, "eval_runtime": 3.0344, "eval_samples_per_second": 32.955, "eval_steps_per_second": 4.284, "step": 675 }, { "best_epoch": 25, "best_eval_accuracy": 0.7, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.67, "eval_loss": 0.45509403944015503, "eval_runtime": 3.0328, "eval_samples_per_second": 32.973, "eval_steps_per_second": 4.287, "step": 700 }, { "best_epoch": 25, "best_eval_accuracy": 0.7, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.69, "eval_loss": 0.42222651839256287, "eval_runtime": 3.0333, "eval_samples_per_second": 32.967, "eval_steps_per_second": 4.286, "step": 725 }, { "best_epoch": 25, "best_eval_accuracy": 0.7, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.71, "eval_loss": 0.4225788116455078, "eval_runtime": 3.0312, "eval_samples_per_second": 32.99, "eval_steps_per_second": 4.289, "step": 750 }, { "best_epoch": 29, "best_eval_accuracy": 0.71, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.69, "eval_loss": 0.47024014592170715, "eval_runtime": 3.0363, "eval_samples_per_second": 32.934, "eval_steps_per_second": 4.281, "step": 775 }, { "best_epoch": 29, "best_eval_accuracy": 0.71, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.7, "eval_loss": 0.41000667214393616, "eval_runtime": 3.0339, "eval_samples_per_second": 32.961, "eval_steps_per_second": 4.285, "step": 800 }, { "best_epoch": 29, "best_eval_accuracy": 0.71, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.69, "eval_loss": 0.4317716658115387, "eval_runtime": 3.0312, "eval_samples_per_second": 32.99, "eval_steps_per_second": 4.289, "step": 825 }, { "best_epoch": 29, "best_eval_accuracy": 0.71, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.71, "eval_loss": 0.4446772634983063, "eval_runtime": 3.0338, "eval_samples_per_second": 32.962, "eval_steps_per_second": 4.285, "step": 850 }, { "best_epoch": 29, "best_eval_accuracy": 0.71, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.72, "eval_loss": 0.3881460130214691, "eval_runtime": 3.0331, "eval_samples_per_second": 32.969, "eval_steps_per_second": 4.286, "step": 875 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.69, "eval_loss": 0.42335742712020874, "eval_runtime": 3.0313, "eval_samples_per_second": 32.989, "eval_steps_per_second": 4.289, "step": 900 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.69, "eval_loss": 0.4869128465652466, "eval_runtime": 3.0313, "eval_samples_per_second": 32.989, "eval_steps_per_second": 4.289, "step": 925 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.71, "eval_loss": 0.4352262020111084, "eval_runtime": 3.0335, "eval_samples_per_second": 32.965, "eval_steps_per_second": 4.285, "step": 950 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.71, "eval_loss": 0.4464748501777649, "eval_runtime": 3.0309, "eval_samples_per_second": 32.993, "eval_steps_per_second": 4.289, "step": 975 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.01, "loss": 0.5086, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.7, "eval_loss": 0.413508802652359, "eval_runtime": 3.0326, "eval_samples_per_second": 32.974, "eval_steps_per_second": 4.287, "step": 1000 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.7, "eval_loss": 0.4060712456703186, "eval_runtime": 3.0312, "eval_samples_per_second": 32.99, "eval_steps_per_second": 4.289, "step": 1025 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.72, "eval_loss": 0.44368666410446167, "eval_runtime": 3.0306, "eval_samples_per_second": 32.997, "eval_steps_per_second": 4.29, "step": 1050 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.72, "eval_loss": 0.4461361765861511, "eval_runtime": 3.0306, "eval_samples_per_second": 32.997, "eval_steps_per_second": 4.29, "step": 1075 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.69, "eval_loss": 0.41435202956199646, "eval_runtime": 3.0303, "eval_samples_per_second": 33.0, "eval_steps_per_second": 4.29, "step": 1100 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.71, "eval_loss": 0.3973015248775482, "eval_runtime": 3.0301, "eval_samples_per_second": 33.002, "eval_steps_per_second": 4.29, "step": 1125 }, { "best_epoch": 34, "best_eval_accuracy": 0.72, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.73, "eval_loss": 0.45110881328582764, "eval_runtime": 3.0313, "eval_samples_per_second": 32.989, "eval_steps_per_second": 4.289, "step": 1150 }, { "best_epoch": 45, "best_eval_accuracy": 0.73, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.71, "eval_loss": 0.4273070991039276, "eval_runtime": 3.0301, "eval_samples_per_second": 33.002, "eval_steps_per_second": 4.29, "step": 1175 }, { "best_epoch": 45, "best_eval_accuracy": 0.73, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.71, "eval_loss": 0.4100435972213745, "eval_runtime": 3.0299, "eval_samples_per_second": 33.004, "eval_steps_per_second": 4.291, "step": 1200 }, { "best_epoch": 45, "best_eval_accuracy": 0.73, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.72, "eval_loss": 0.4208601415157318, "eval_runtime": 3.0294, "eval_samples_per_second": 33.01, "eval_steps_per_second": 4.291, "step": 1225 }, { "best_epoch": 45, "best_eval_accuracy": 0.73, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.74, "eval_loss": 0.41906824707984924, "eval_runtime": 3.0315, "eval_samples_per_second": 32.987, "eval_steps_per_second": 4.288, "step": 1250 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.74, "eval_loss": 0.40232476592063904, "eval_runtime": 3.0299, "eval_samples_per_second": 33.004, "eval_steps_per_second": 4.291, "step": 1275 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.72, "eval_loss": 0.40375789999961853, "eval_runtime": 3.0318, "eval_samples_per_second": 32.984, "eval_steps_per_second": 4.288, "step": 1300 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.73, "eval_loss": 0.41481122374534607, "eval_runtime": 3.0329, "eval_samples_per_second": 32.972, "eval_steps_per_second": 4.286, "step": 1325 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.72, "eval_loss": 0.42626023292541504, "eval_runtime": 3.0313, "eval_samples_per_second": 32.989, "eval_steps_per_second": 4.289, "step": 1350 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.73, "eval_loss": 0.43311816453933716, "eval_runtime": 3.035, "eval_samples_per_second": 32.949, "eval_steps_per_second": 4.283, "step": 1375 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.71, "eval_loss": 0.4372536838054657, "eval_runtime": 3.0311, "eval_samples_per_second": 32.991, "eval_steps_per_second": 4.289, "step": 1400 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.72, "eval_loss": 0.40811866521835327, "eval_runtime": 3.0315, "eval_samples_per_second": 32.987, "eval_steps_per_second": 4.288, "step": 1425 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.71, "eval_loss": 0.40780752897262573, "eval_runtime": 3.0299, "eval_samples_per_second": 33.004, "eval_steps_per_second": 4.291, "step": 1450 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.72, "eval_loss": 0.42498713731765747, "eval_runtime": 3.0363, "eval_samples_per_second": 32.935, "eval_steps_per_second": 4.282, "step": 1475 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.005, "loss": 0.4268, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.7, "eval_loss": 0.4223678708076477, "eval_runtime": 3.0288, "eval_samples_per_second": 33.016, "eval_steps_per_second": 4.292, "step": 1500 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.7, "eval_loss": 0.4255259335041046, "eval_runtime": 3.0305, "eval_samples_per_second": 32.997, "eval_steps_per_second": 4.29, "step": 1525 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.72, "eval_loss": 0.41144871711730957, "eval_runtime": 3.0322, "eval_samples_per_second": 32.98, "eval_steps_per_second": 4.287, "step": 1550 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.72, "eval_loss": 0.42656296491622925, "eval_runtime": 3.0304, "eval_samples_per_second": 32.999, "eval_steps_per_second": 4.29, "step": 1575 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.72, "eval_loss": 0.4097367227077484, "eval_runtime": 3.03, "eval_samples_per_second": 33.004, "eval_steps_per_second": 4.29, "step": 1600 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.72, "eval_loss": 0.40528982877731323, "eval_runtime": 3.0318, "eval_samples_per_second": 32.983, "eval_steps_per_second": 4.288, "step": 1625 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.71, "eval_loss": 0.4050586223602295, "eval_runtime": 3.0289, "eval_samples_per_second": 33.015, "eval_steps_per_second": 4.292, "step": 1650 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.73, "eval_loss": 0.41345199942588806, "eval_runtime": 3.031, "eval_samples_per_second": 32.993, "eval_steps_per_second": 4.289, "step": 1675 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.74, "eval_loss": 0.3958780765533447, "eval_runtime": 3.0359, "eval_samples_per_second": 32.939, "eval_steps_per_second": 4.282, "step": 1700 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.72, "eval_loss": 0.4162384867668152, "eval_runtime": 3.0305, "eval_samples_per_second": 32.998, "eval_steps_per_second": 4.29, "step": 1725 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.73, "eval_loss": 0.4060596227645874, "eval_runtime": 3.0315, "eval_samples_per_second": 32.986, "eval_steps_per_second": 4.288, "step": 1750 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.71, "eval_loss": 0.4015595614910126, "eval_runtime": 3.03, "eval_samples_per_second": 33.003, "eval_steps_per_second": 4.29, "step": 1775 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.71, "eval_loss": 0.4193567931652069, "eval_runtime": 3.0293, "eval_samples_per_second": 33.011, "eval_steps_per_second": 4.291, "step": 1800 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.72, "eval_loss": 0.4097726345062256, "eval_runtime": 3.0293, "eval_samples_per_second": 33.011, "eval_steps_per_second": 4.291, "step": 1825 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.71, "eval_loss": 0.41785815358161926, "eval_runtime": 3.0313, "eval_samples_per_second": 32.989, "eval_steps_per_second": 4.289, "step": 1850 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.71, "eval_loss": 0.41046372056007385, "eval_runtime": 3.0316, "eval_samples_per_second": 32.986, "eval_steps_per_second": 4.288, "step": 1875 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.72, "eval_loss": 0.413968950510025, "eval_runtime": 3.0295, "eval_samples_per_second": 33.008, "eval_steps_per_second": 4.291, "step": 1900 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.73, "eval_loss": 0.4080766439437866, "eval_runtime": 3.0307, "eval_samples_per_second": 32.996, "eval_steps_per_second": 4.289, "step": 1925 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.73, "eval_loss": 0.40437057614326477, "eval_runtime": 3.0312, "eval_samples_per_second": 32.99, "eval_steps_per_second": 4.289, "step": 1950 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.72, "eval_loss": 0.3996027410030365, "eval_runtime": 3.033, "eval_samples_per_second": 32.971, "eval_steps_per_second": 4.286, "step": 1975 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.3915, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.72, "eval_loss": 0.40144088864326477, "eval_runtime": 3.0353, "eval_samples_per_second": 32.946, "eval_steps_per_second": 4.283, "step": 2000 }, { "best_epoch": 49, "best_eval_accuracy": 0.74, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.5144821853637696, "train_runtime": 1625.7247, "train_samples_per_second": 19.684, "train_steps_per_second": 1.23 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }