{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.4, "eval_loss": 0.7262819409370422, "eval_runtime": 2.9351, "eval_samples_per_second": 34.071, "eval_steps_per_second": 4.429, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.4, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.6, "eval_loss": 0.6114884614944458, "eval_runtime": 2.9878, "eval_samples_per_second": 33.47, "eval_steps_per_second": 4.351, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.6, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.62, "eval_loss": 0.5427446365356445, "eval_runtime": 3.0364, "eval_samples_per_second": 32.934, "eval_steps_per_second": 4.281, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.62, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.61, "eval_loss": 0.5318807363510132, "eval_runtime": 3.0643, "eval_samples_per_second": 32.634, "eval_steps_per_second": 4.242, "step": 100 }, { "best_epoch": 2, "best_eval_accuracy": 0.62, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.55, "eval_loss": 0.5817937254905701, "eval_runtime": 3.0801, "eval_samples_per_second": 32.467, "eval_steps_per_second": 4.221, "step": 125 }, { "best_epoch": 2, "best_eval_accuracy": 0.62, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.68, "eval_loss": 0.5093362331390381, "eval_runtime": 3.0872, "eval_samples_per_second": 32.391, "eval_steps_per_second": 4.211, "step": 150 }, { "best_epoch": 5, "best_eval_accuracy": 0.68, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.63, "eval_loss": 0.7841365337371826, "eval_runtime": 3.0946, "eval_samples_per_second": 32.315, "eval_steps_per_second": 4.201, "step": 175 }, { "best_epoch": 5, "best_eval_accuracy": 0.68, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.68, "eval_loss": 0.7628840804100037, "eval_runtime": 3.096, "eval_samples_per_second": 32.3, "eval_steps_per_second": 4.199, "step": 200 }, { "best_epoch": 5, "best_eval_accuracy": 0.68, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.69, "eval_loss": 0.5873813629150391, "eval_runtime": 3.1038, "eval_samples_per_second": 32.218, "eval_steps_per_second": 4.188, "step": 225 }, { "best_epoch": 8, "best_eval_accuracy": 0.69, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.71, "eval_loss": 0.5227637887001038, "eval_runtime": 3.1015, "eval_samples_per_second": 32.242, "eval_steps_per_second": 4.192, "step": 250 }, { "best_epoch": 9, "best_eval_accuracy": 0.71, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.74, "eval_loss": 0.843931257724762, "eval_runtime": 3.1122, "eval_samples_per_second": 32.132, "eval_steps_per_second": 4.177, "step": 275 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.71, "eval_loss": 0.8242893218994141, "eval_runtime": 3.1122, "eval_samples_per_second": 32.131, "eval_steps_per_second": 4.177, "step": 300 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.65, "eval_loss": 0.5669903755187988, "eval_runtime": 3.112, "eval_samples_per_second": 32.134, "eval_steps_per_second": 4.177, "step": 325 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.61, "eval_loss": 0.5600945353507996, "eval_runtime": 3.1143, "eval_samples_per_second": 32.11, "eval_steps_per_second": 4.174, "step": 350 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.64, "eval_loss": 0.6452310085296631, "eval_runtime": 3.1101, "eval_samples_per_second": 32.153, "eval_steps_per_second": 4.18, "step": 375 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.69, "eval_loss": 0.523855984210968, "eval_runtime": 3.1099, "eval_samples_per_second": 32.155, "eval_steps_per_second": 4.18, "step": 400 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.66, "eval_loss": 0.7315166592597961, "eval_runtime": 3.1123, "eval_samples_per_second": 32.13, "eval_steps_per_second": 4.177, "step": 425 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.67, "eval_loss": 0.6651241779327393, "eval_runtime": 3.1136, "eval_samples_per_second": 32.117, "eval_steps_per_second": 4.175, "step": 450 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.72, "eval_loss": 0.9039706587791443, "eval_runtime": 3.1111, "eval_samples_per_second": 32.143, "eval_steps_per_second": 4.179, "step": 475 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.037500000000000006, "loss": 1.3727, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.73, "eval_loss": 0.5786058306694031, "eval_runtime": 3.1123, "eval_samples_per_second": 32.131, "eval_steps_per_second": 4.177, "step": 500 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.69, "eval_loss": 0.7332864999771118, "eval_runtime": 3.1117, "eval_samples_per_second": 32.137, "eval_steps_per_second": 4.178, "step": 525 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.7, "eval_loss": 0.7584354877471924, "eval_runtime": 3.1101, "eval_samples_per_second": 32.154, "eval_steps_per_second": 4.18, "step": 550 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.71, "eval_loss": 0.9900915026664734, "eval_runtime": 3.1096, "eval_samples_per_second": 32.158, "eval_steps_per_second": 4.181, "step": 575 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.7, "eval_loss": 0.5710880160331726, "eval_runtime": 3.1075, "eval_samples_per_second": 32.18, "eval_steps_per_second": 4.183, "step": 600 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.67, "eval_loss": 0.5870327353477478, "eval_runtime": 3.112, "eval_samples_per_second": 32.134, "eval_steps_per_second": 4.177, "step": 625 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.7, "eval_loss": 0.5832374691963196, "eval_runtime": 3.1121, "eval_samples_per_second": 32.132, "eval_steps_per_second": 4.177, "step": 650 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.72, "eval_loss": 0.9776791930198669, "eval_runtime": 3.1127, "eval_samples_per_second": 32.126, "eval_steps_per_second": 4.176, "step": 675 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.71, "eval_loss": 0.6447842121124268, "eval_runtime": 3.1119, "eval_samples_per_second": 32.135, "eval_steps_per_second": 4.178, "step": 700 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.71, "eval_loss": 0.8738675117492676, "eval_runtime": 3.1117, "eval_samples_per_second": 32.136, "eval_steps_per_second": 4.178, "step": 725 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.68, "eval_loss": 0.6709777116775513, "eval_runtime": 3.112, "eval_samples_per_second": 32.134, "eval_steps_per_second": 4.177, "step": 750 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.71, "eval_loss": 0.5918809771537781, "eval_runtime": 3.11, "eval_samples_per_second": 32.154, "eval_steps_per_second": 4.18, "step": 775 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.7, "eval_loss": 0.7616005539894104, "eval_runtime": 3.1114, "eval_samples_per_second": 32.14, "eval_steps_per_second": 4.178, "step": 800 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.72, "eval_loss": 0.5836986303329468, "eval_runtime": 3.1103, "eval_samples_per_second": 32.151, "eval_steps_per_second": 4.18, "step": 825 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.74, "eval_loss": 1.0103265047073364, "eval_runtime": 3.1122, "eval_samples_per_second": 32.131, "eval_steps_per_second": 4.177, "step": 850 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.73, "eval_loss": 0.7007672786712646, "eval_runtime": 3.1105, "eval_samples_per_second": 32.149, "eval_steps_per_second": 4.179, "step": 875 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.72, "eval_loss": 1.016095519065857, "eval_runtime": 3.1114, "eval_samples_per_second": 32.14, "eval_steps_per_second": 4.178, "step": 900 }, { "best_epoch": 10, "best_eval_accuracy": 0.74, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.75, "eval_loss": 0.6910789012908936, "eval_runtime": 3.1125, "eval_samples_per_second": 32.128, "eval_steps_per_second": 4.177, "step": 925 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.75, "eval_loss": 0.6450607776641846, "eval_runtime": 3.1144, "eval_samples_per_second": 32.109, "eval_steps_per_second": 4.174, "step": 950 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.74, "eval_loss": 0.7189579010009766, "eval_runtime": 3.1147, "eval_samples_per_second": 32.106, "eval_steps_per_second": 4.174, "step": 975 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.025, "loss": 0.7534, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.74, "eval_loss": 0.516425609588623, "eval_runtime": 3.1107, "eval_samples_per_second": 32.147, "eval_steps_per_second": 4.179, "step": 1000 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.72, "eval_loss": 0.4994637668132782, "eval_runtime": 3.1112, "eval_samples_per_second": 32.142, "eval_steps_per_second": 4.178, "step": 1025 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.75, "eval_loss": 0.584023654460907, "eval_runtime": 3.1124, "eval_samples_per_second": 32.129, "eval_steps_per_second": 4.177, "step": 1050 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.75, "eval_loss": 0.7394511699676514, "eval_runtime": 3.1125, "eval_samples_per_second": 32.129, "eval_steps_per_second": 4.177, "step": 1075 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.72, "eval_loss": 0.6374080181121826, "eval_runtime": 3.1131, "eval_samples_per_second": 32.122, "eval_steps_per_second": 4.176, "step": 1100 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.73, "eval_loss": 0.7466642260551453, "eval_runtime": 3.1096, "eval_samples_per_second": 32.158, "eval_steps_per_second": 4.181, "step": 1125 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.74, "eval_loss": 0.6875578165054321, "eval_runtime": 3.1103, "eval_samples_per_second": 32.151, "eval_steps_per_second": 4.18, "step": 1150 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.74, "eval_loss": 0.5958933234214783, "eval_runtime": 3.11, "eval_samples_per_second": 32.155, "eval_steps_per_second": 4.18, "step": 1175 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.74, "eval_loss": 0.5625195503234863, "eval_runtime": 3.1122, "eval_samples_per_second": 32.131, "eval_steps_per_second": 4.177, "step": 1200 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.75, "eval_loss": 0.6837201714515686, "eval_runtime": 3.1116, "eval_samples_per_second": 32.137, "eval_steps_per_second": 4.178, "step": 1225 }, { "best_epoch": 36, "best_eval_accuracy": 0.75, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.76, "eval_loss": 0.6766077280044556, "eval_runtime": 3.1132, "eval_samples_per_second": 32.122, "eval_steps_per_second": 4.176, "step": 1250 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.75, "eval_loss": 0.6265960335731506, "eval_runtime": 3.1128, "eval_samples_per_second": 32.125, "eval_steps_per_second": 4.176, "step": 1275 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.74, "eval_loss": 0.6641672253608704, "eval_runtime": 3.1146, "eval_samples_per_second": 32.106, "eval_steps_per_second": 4.174, "step": 1300 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.74, "eval_loss": 0.6202273368835449, "eval_runtime": 3.1106, "eval_samples_per_second": 32.148, "eval_steps_per_second": 4.179, "step": 1325 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.75, "eval_loss": 0.6397892236709595, "eval_runtime": 3.1097, "eval_samples_per_second": 32.157, "eval_steps_per_second": 4.18, "step": 1350 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.75, "eval_loss": 0.6689194440841675, "eval_runtime": 3.1153, "eval_samples_per_second": 32.1, "eval_steps_per_second": 4.173, "step": 1375 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.76, "eval_loss": 0.6628773212432861, "eval_runtime": 3.1124, "eval_samples_per_second": 32.13, "eval_steps_per_second": 4.177, "step": 1400 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.76, "eval_loss": 0.5903107523918152, "eval_runtime": 3.1113, "eval_samples_per_second": 32.141, "eval_steps_per_second": 4.178, "step": 1425 }, { "best_epoch": 49, "best_eval_accuracy": 0.76, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.77, "eval_loss": 0.6132528185844421, "eval_runtime": 3.1127, "eval_samples_per_second": 32.127, "eval_steps_per_second": 4.177, "step": 1450 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.76, "eval_loss": 0.6884872317314148, "eval_runtime": 3.1145, "eval_samples_per_second": 32.108, "eval_steps_per_second": 4.174, "step": 1475 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0125, "loss": 0.4477, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.76, "eval_loss": 0.5950008630752563, "eval_runtime": 3.1114, "eval_samples_per_second": 32.14, "eval_steps_per_second": 4.178, "step": 1500 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.75, "eval_loss": 0.5714966058731079, "eval_runtime": 3.1122, "eval_samples_per_second": 32.131, "eval_steps_per_second": 4.177, "step": 1525 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.76, "eval_loss": 0.6111324429512024, "eval_runtime": 3.1175, "eval_samples_per_second": 32.077, "eval_steps_per_second": 4.17, "step": 1550 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.76, "eval_loss": 0.6023278832435608, "eval_runtime": 3.113, "eval_samples_per_second": 32.124, "eval_steps_per_second": 4.176, "step": 1575 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.76, "eval_loss": 0.5792566537857056, "eval_runtime": 3.1121, "eval_samples_per_second": 32.133, "eval_steps_per_second": 4.177, "step": 1600 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.74, "eval_loss": 0.5727165341377258, "eval_runtime": 3.1107, "eval_samples_per_second": 32.147, "eval_steps_per_second": 4.179, "step": 1625 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.76, "eval_loss": 0.5606411099433899, "eval_runtime": 3.1095, "eval_samples_per_second": 32.159, "eval_steps_per_second": 4.181, "step": 1650 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.76, "eval_loss": 0.5970374345779419, "eval_runtime": 3.1149, "eval_samples_per_second": 32.103, "eval_steps_per_second": 4.173, "step": 1675 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.76, "eval_loss": 0.5601862072944641, "eval_runtime": 3.1113, "eval_samples_per_second": 32.14, "eval_steps_per_second": 4.178, "step": 1700 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.75, "eval_loss": 0.578113317489624, "eval_runtime": 3.1132, "eval_samples_per_second": 32.121, "eval_steps_per_second": 4.176, "step": 1725 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.76, "eval_loss": 0.6141662001609802, "eval_runtime": 3.1126, "eval_samples_per_second": 32.127, "eval_steps_per_second": 4.177, "step": 1750 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.76, "eval_loss": 0.5757777690887451, "eval_runtime": 3.1115, "eval_samples_per_second": 32.139, "eval_steps_per_second": 4.178, "step": 1775 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.75, "eval_loss": 0.5649742484092712, "eval_runtime": 3.1127, "eval_samples_per_second": 32.127, "eval_steps_per_second": 4.176, "step": 1800 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.76, "eval_loss": 0.5823287963867188, "eval_runtime": 3.112, "eval_samples_per_second": 32.134, "eval_steps_per_second": 4.177, "step": 1825 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.76, "eval_loss": 0.5546900033950806, "eval_runtime": 3.111, "eval_samples_per_second": 32.144, "eval_steps_per_second": 4.179, "step": 1850 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.76, "eval_loss": 0.5636573433876038, "eval_runtime": 3.1109, "eval_samples_per_second": 32.145, "eval_steps_per_second": 4.179, "step": 1875 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.76, "eval_loss": 0.5805944204330444, "eval_runtime": 3.1125, "eval_samples_per_second": 32.129, "eval_steps_per_second": 4.177, "step": 1900 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.76, "eval_loss": 0.5601564645767212, "eval_runtime": 3.1215, "eval_samples_per_second": 32.036, "eval_steps_per_second": 4.165, "step": 1925 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.76, "eval_loss": 0.570804238319397, "eval_runtime": 3.1168, "eval_samples_per_second": 32.084, "eval_steps_per_second": 4.171, "step": 1950 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.76, "eval_loss": 0.5623500347137451, "eval_runtime": 3.11, "eval_samples_per_second": 32.155, "eval_steps_per_second": 4.18, "step": 1975 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.3287, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.76, "eval_loss": 0.5641293525695801, "eval_runtime": 3.1089, "eval_samples_per_second": 32.166, "eval_steps_per_second": 4.182, "step": 2000 }, { "best_epoch": 57, "best_eval_accuracy": 0.77, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.7256226806640625, "train_runtime": 1655.9889, "train_samples_per_second": 19.324, "train_steps_per_second": 1.208 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }