{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.65, "eval_loss": 0.5447632074356079, "eval_runtime": 2.9004, "eval_samples_per_second": 34.478, "eval_steps_per_second": 4.482, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.65, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.66, "eval_loss": 0.5351557731628418, "eval_runtime": 2.9353, "eval_samples_per_second": 34.068, "eval_steps_per_second": 4.429, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.65, "eval_loss": 0.5466512441635132, "eval_runtime": 2.9987, "eval_samples_per_second": 33.348, "eval_steps_per_second": 4.335, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.65, "eval_loss": 0.5432061553001404, "eval_runtime": 3.0354, "eval_samples_per_second": 32.945, "eval_steps_per_second": 4.283, "step": 100 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.66, "eval_loss": 0.5446352958679199, "eval_runtime": 3.0683, "eval_samples_per_second": 32.591, "eval_steps_per_second": 4.237, "step": 125 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.63, "eval_loss": 0.5419210195541382, "eval_runtime": 3.0777, "eval_samples_per_second": 32.492, "eval_steps_per_second": 4.224, "step": 150 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.5364170074462891, "eval_runtime": 3.075, "eval_samples_per_second": 32.52, "eval_steps_per_second": 4.228, "step": 175 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.66, "eval_loss": 0.5399757027626038, "eval_runtime": 3.0939, "eval_samples_per_second": 32.322, "eval_steps_per_second": 4.202, "step": 200 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.66, "eval_loss": 0.5460018515586853, "eval_runtime": 3.0861, "eval_samples_per_second": 32.404, "eval_steps_per_second": 4.212, "step": 225 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.66, "eval_loss": 0.547890305519104, "eval_runtime": 3.0888, "eval_samples_per_second": 32.375, "eval_steps_per_second": 4.209, "step": 250 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.66, "eval_loss": 0.542870819568634, "eval_runtime": 3.0902, "eval_samples_per_second": 32.361, "eval_steps_per_second": 4.207, "step": 275 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.66, "eval_loss": 0.5363436341285706, "eval_runtime": 3.0944, "eval_samples_per_second": 32.316, "eval_steps_per_second": 4.201, "step": 300 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.66, "eval_loss": 0.5431585907936096, "eval_runtime": 3.0952, "eval_samples_per_second": 32.308, "eval_steps_per_second": 4.2, "step": 325 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.63, "eval_loss": 0.5446100831031799, "eval_runtime": 3.0959, "eval_samples_per_second": 32.301, "eval_steps_per_second": 4.199, "step": 350 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.65, "eval_loss": 0.5619304180145264, "eval_runtime": 3.0987, "eval_samples_per_second": 32.272, "eval_steps_per_second": 4.195, "step": 375 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.66, "eval_loss": 0.5399982929229736, "eval_runtime": 3.0957, "eval_samples_per_second": 32.303, "eval_steps_per_second": 4.199, "step": 400 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.66, "eval_loss": 0.5394828915596008, "eval_runtime": 3.0984, "eval_samples_per_second": 32.274, "eval_steps_per_second": 4.196, "step": 425 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.66, "eval_loss": 0.5439099073410034, "eval_runtime": 3.0994, "eval_samples_per_second": 32.264, "eval_steps_per_second": 4.194, "step": 450 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.66, "eval_loss": 0.5420183539390564, "eval_runtime": 3.0999, "eval_samples_per_second": 32.259, "eval_steps_per_second": 4.194, "step": 475 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.00075, "loss": 0.6126, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.66, "eval_loss": 0.5401513576507568, "eval_runtime": 3.0996, "eval_samples_per_second": 32.262, "eval_steps_per_second": 4.194, "step": 500 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.65, "eval_loss": 0.5430670976638794, "eval_runtime": 3.1006, "eval_samples_per_second": 32.252, "eval_steps_per_second": 4.193, "step": 525 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.62, "eval_loss": 0.5421171188354492, "eval_runtime": 3.1013, "eval_samples_per_second": 32.245, "eval_steps_per_second": 4.192, "step": 550 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.65, "eval_loss": 0.5431703329086304, "eval_runtime": 3.1002, "eval_samples_per_second": 32.256, "eval_steps_per_second": 4.193, "step": 575 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.65, "eval_loss": 0.5438470244407654, "eval_runtime": 3.0981, "eval_samples_per_second": 32.277, "eval_steps_per_second": 4.196, "step": 600 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.64, "eval_loss": 0.5364149212837219, "eval_runtime": 3.1003, "eval_samples_per_second": 32.255, "eval_steps_per_second": 4.193, "step": 625 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.63, "eval_loss": 0.541420578956604, "eval_runtime": 3.0998, "eval_samples_per_second": 32.26, "eval_steps_per_second": 4.194, "step": 650 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.65, "eval_loss": 0.5394607782363892, "eval_runtime": 3.0995, "eval_samples_per_second": 32.263, "eval_steps_per_second": 4.194, "step": 675 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 0.5440071225166321, "eval_runtime": 3.1009, "eval_samples_per_second": 32.249, "eval_steps_per_second": 4.192, "step": 700 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.63, "eval_loss": 0.5445890426635742, "eval_runtime": 3.1002, "eval_samples_per_second": 32.256, "eval_steps_per_second": 4.193, "step": 725 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.59, "eval_loss": 0.5472090840339661, "eval_runtime": 3.1018, "eval_samples_per_second": 32.239, "eval_steps_per_second": 4.191, "step": 750 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.65, "eval_loss": 0.5418501496315002, "eval_runtime": 3.0989, "eval_samples_per_second": 32.27, "eval_steps_per_second": 4.195, "step": 775 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.62, "eval_loss": 0.5413060188293457, "eval_runtime": 3.1001, "eval_samples_per_second": 32.257, "eval_steps_per_second": 4.193, "step": 800 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.62, "eval_loss": 0.5530202388763428, "eval_runtime": 3.1009, "eval_samples_per_second": 32.249, "eval_steps_per_second": 4.192, "step": 825 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.62, "eval_loss": 0.5460859537124634, "eval_runtime": 3.1, "eval_samples_per_second": 32.258, "eval_steps_per_second": 4.194, "step": 850 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.64, "eval_loss": 0.5440330505371094, "eval_runtime": 3.1019, "eval_samples_per_second": 32.238, "eval_steps_per_second": 4.191, "step": 875 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.64, "eval_loss": 0.5436714291572571, "eval_runtime": 3.1024, "eval_samples_per_second": 32.234, "eval_steps_per_second": 4.19, "step": 900 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.63, "eval_loss": 0.543533444404602, "eval_runtime": 3.1035, "eval_samples_per_second": 32.221, "eval_steps_per_second": 4.189, "step": 925 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.63, "eval_loss": 0.5481903553009033, "eval_runtime": 3.1013, "eval_samples_per_second": 32.244, "eval_steps_per_second": 4.192, "step": 950 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.64, "eval_loss": 0.5449146628379822, "eval_runtime": 3.1036, "eval_samples_per_second": 32.221, "eval_steps_per_second": 4.189, "step": 975 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.0005, "loss": 0.6037, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.64, "eval_loss": 0.5441970229148865, "eval_runtime": 3.1022, "eval_samples_per_second": 32.235, "eval_steps_per_second": 4.191, "step": 1000 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.62, "eval_loss": 0.5376811027526855, "eval_runtime": 3.1035, "eval_samples_per_second": 32.222, "eval_steps_per_second": 4.189, "step": 1025 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.64, "eval_loss": 0.5410917401313782, "eval_runtime": 3.1029, "eval_samples_per_second": 32.228, "eval_steps_per_second": 4.19, "step": 1050 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.59, "eval_loss": 0.5482169985771179, "eval_runtime": 3.1058, "eval_samples_per_second": 32.198, "eval_steps_per_second": 4.186, "step": 1075 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.62, "eval_loss": 0.5494033694267273, "eval_runtime": 3.1008, "eval_samples_per_second": 32.25, "eval_steps_per_second": 4.193, "step": 1100 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.6, "eval_loss": 0.5510344505310059, "eval_runtime": 3.1036, "eval_samples_per_second": 32.221, "eval_steps_per_second": 4.189, "step": 1125 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.61, "eval_loss": 0.5471860766410828, "eval_runtime": 3.1029, "eval_samples_per_second": 32.228, "eval_steps_per_second": 4.19, "step": 1150 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.64, "eval_loss": 0.5415754318237305, "eval_runtime": 3.1057, "eval_samples_per_second": 32.199, "eval_steps_per_second": 4.186, "step": 1175 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.64, "eval_loss": 0.5396535396575928, "eval_runtime": 3.1031, "eval_samples_per_second": 32.226, "eval_steps_per_second": 4.189, "step": 1200 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.64, "eval_loss": 0.5417464375495911, "eval_runtime": 3.1027, "eval_samples_per_second": 32.23, "eval_steps_per_second": 4.19, "step": 1225 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.64, "eval_loss": 0.5390459895133972, "eval_runtime": 3.1051, "eval_samples_per_second": 32.205, "eval_steps_per_second": 4.187, "step": 1250 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.63, "eval_loss": 0.5389121174812317, "eval_runtime": 3.1044, "eval_samples_per_second": 32.213, "eval_steps_per_second": 4.188, "step": 1275 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.64, "eval_loss": 0.5365983843803406, "eval_runtime": 3.1052, "eval_samples_per_second": 32.204, "eval_steps_per_second": 4.187, "step": 1300 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.64, "eval_loss": 0.5367568731307983, "eval_runtime": 3.1042, "eval_samples_per_second": 32.215, "eval_steps_per_second": 4.188, "step": 1325 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.64, "eval_loss": 0.5392716526985168, "eval_runtime": 3.1024, "eval_samples_per_second": 32.233, "eval_steps_per_second": 4.19, "step": 1350 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.64, "eval_loss": 0.5378238558769226, "eval_runtime": 3.1049, "eval_samples_per_second": 32.207, "eval_steps_per_second": 4.187, "step": 1375 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.64, "eval_loss": 0.5391019582748413, "eval_runtime": 3.106, "eval_samples_per_second": 32.196, "eval_steps_per_second": 4.185, "step": 1400 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.63, "eval_loss": 0.5383239388465881, "eval_runtime": 3.1054, "eval_samples_per_second": 32.202, "eval_steps_per_second": 4.186, "step": 1425 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.63, "eval_loss": 0.5379219055175781, "eval_runtime": 3.1022, "eval_samples_per_second": 32.235, "eval_steps_per_second": 4.191, "step": 1450 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.64, "eval_loss": 0.5381463766098022, "eval_runtime": 3.1059, "eval_samples_per_second": 32.197, "eval_steps_per_second": 4.186, "step": 1475 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.00025, "loss": 0.6021, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.64, "eval_loss": 0.5409640669822693, "eval_runtime": 3.1098, "eval_samples_per_second": 32.156, "eval_steps_per_second": 4.18, "step": 1500 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.64, "eval_loss": 0.5401256084442139, "eval_runtime": 3.1056, "eval_samples_per_second": 32.2, "eval_steps_per_second": 4.186, "step": 1525 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.64, "eval_loss": 0.5403258800506592, "eval_runtime": 3.1087, "eval_samples_per_second": 32.167, "eval_steps_per_second": 4.182, "step": 1550 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.64, "eval_loss": 0.5411276817321777, "eval_runtime": 3.103, "eval_samples_per_second": 32.226, "eval_steps_per_second": 4.189, "step": 1575 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.64, "eval_loss": 0.5414915680885315, "eval_runtime": 3.1078, "eval_samples_per_second": 32.177, "eval_steps_per_second": 4.183, "step": 1600 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.64, "eval_loss": 0.5414855480194092, "eval_runtime": 3.1111, "eval_samples_per_second": 32.143, "eval_steps_per_second": 4.179, "step": 1625 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.64, "eval_loss": 0.5408964157104492, "eval_runtime": 3.1087, "eval_samples_per_second": 32.168, "eval_steps_per_second": 4.182, "step": 1650 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.64, "eval_loss": 0.5418642163276672, "eval_runtime": 3.1104, "eval_samples_per_second": 32.15, "eval_steps_per_second": 4.179, "step": 1675 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.64, "eval_loss": 0.5401394367218018, "eval_runtime": 3.1106, "eval_samples_per_second": 32.148, "eval_steps_per_second": 4.179, "step": 1700 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.64, "eval_loss": 0.5424190163612366, "eval_runtime": 3.1114, "eval_samples_per_second": 32.139, "eval_steps_per_second": 4.178, "step": 1725 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.64, "eval_loss": 0.5420389175415039, "eval_runtime": 3.1113, "eval_samples_per_second": 32.141, "eval_steps_per_second": 4.178, "step": 1750 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.64, "eval_loss": 0.541526734828949, "eval_runtime": 3.1099, "eval_samples_per_second": 32.155, "eval_steps_per_second": 4.18, "step": 1775 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.64, "eval_loss": 0.5391311049461365, "eval_runtime": 3.1129, "eval_samples_per_second": 32.124, "eval_steps_per_second": 4.176, "step": 1800 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.64, "eval_loss": 0.5395915508270264, "eval_runtime": 3.1177, "eval_samples_per_second": 32.075, "eval_steps_per_second": 4.17, "step": 1825 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.64, "eval_loss": 0.5395561456680298, "eval_runtime": 3.1126, "eval_samples_per_second": 32.128, "eval_steps_per_second": 4.177, "step": 1850 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.64, "eval_loss": 0.54047030210495, "eval_runtime": 3.1118, "eval_samples_per_second": 32.135, "eval_steps_per_second": 4.178, "step": 1875 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.64, "eval_loss": 0.5403570532798767, "eval_runtime": 3.1117, "eval_samples_per_second": 32.137, "eval_steps_per_second": 4.178, "step": 1900 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.64, "eval_loss": 0.5400457978248596, "eval_runtime": 3.122, "eval_samples_per_second": 32.031, "eval_steps_per_second": 4.164, "step": 1925 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.64, "eval_loss": 0.5401412844657898, "eval_runtime": 3.1203, "eval_samples_per_second": 32.048, "eval_steps_per_second": 4.166, "step": 1950 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.64, "eval_loss": 0.5402973890304565, "eval_runtime": 3.1111, "eval_samples_per_second": 32.143, "eval_steps_per_second": 4.179, "step": 1975 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.5946, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.64, "eval_loss": 0.5402593612670898, "eval_runtime": 3.1254, "eval_samples_per_second": 31.995, "eval_steps_per_second": 4.159, "step": 2000 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.6032544250488281, "train_runtime": 1653.7835, "train_samples_per_second": 19.35, "train_steps_per_second": 1.209 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }