{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.0, "global_step": 999, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.2262943855309169e-05, "loss": 3.3787, "step": 27 }, { "epoch": 1.0, "eval_accuracy": 0.5104135133498819, "eval_loss": 3.0533134937286377, "eval_runtime": 7.7526, "eval_samples_per_second": 57.271, "eval_steps_per_second": 0.258, "step": 27 }, { "epoch": 2.0, "learning_rate": 1.4841962570206113e-05, "loss": 3.0423, "step": 54 }, { "epoch": 2.0, "eval_accuracy": 0.5218076561694905, "eval_loss": 2.9270966053009033, "eval_runtime": 7.1668, "eval_samples_per_second": 61.953, "eval_steps_per_second": 0.279, "step": 54 }, { "epoch": 3.0, "learning_rate": 1.6350591807078892e-05, "loss": 2.8826, "step": 81 }, { "epoch": 3.0, "eval_accuracy": 0.5349013657056145, "eval_loss": 2.826730489730835, "eval_runtime": 7.1613, "eval_samples_per_second": 62.0, "eval_steps_per_second": 0.279, "step": 81 }, { "epoch": 4.0, "learning_rate": 1.7420981285103056e-05, "loss": 2.7528, "step": 108 }, { "epoch": 4.0, "eval_accuracy": 0.5677539663746152, "eval_loss": 2.5704185962677, "eval_runtime": 7.1293, "eval_samples_per_second": 62.279, "eval_steps_per_second": 0.281, "step": 108 }, { "epoch": 5.0, "learning_rate": 1.825123986666868e-05, "loss": 2.676, "step": 135 }, { "epoch": 5.0, "eval_accuracy": 0.5691213903273941, "eval_loss": 2.544525384902954, "eval_runtime": 7.2153, "eval_samples_per_second": 61.536, "eval_steps_per_second": 0.277, "step": 135 }, { "epoch": 6.0, "learning_rate": 1.892961052197583e-05, "loss": 2.6468, "step": 162 }, { "epoch": 6.0, "eval_accuracy": 0.5640251055842328, "eval_loss": 2.552178144454956, "eval_runtime": 7.1679, "eval_samples_per_second": 61.943, "eval_steps_per_second": 0.279, "step": 162 }, { "epoch": 7.0, "learning_rate": 1.9503164738653782e-05, "loss": 2.5425, "step": 189 }, { "epoch": 7.0, "eval_accuracy": 0.592901878914405, "eval_loss": 2.344503879547119, "eval_runtime": 7.1022, "eval_samples_per_second": 62.516, "eval_steps_per_second": 0.282, "step": 189 }, { "epoch": 8.0, "learning_rate": 1.9999999999999998e-05, "loss": 2.4507, "step": 216 }, { "epoch": 8.0, "eval_accuracy": 0.6077949332933593, "eval_loss": 2.190293788909912, "eval_runtime": 7.1171, "eval_samples_per_second": 62.385, "eval_steps_per_second": 0.281, "step": 216 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 2.3779, "step": 243 }, { "epoch": 9.0, "eval_accuracy": 0.6202360876897133, "eval_loss": 2.0489487648010254, "eval_runtime": 7.164, "eval_samples_per_second": 61.977, "eval_steps_per_second": 0.279, "step": 243 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 2.2947, "step": 270 }, { "epoch": 10.0, "eval_accuracy": 0.6456804315147228, "eval_loss": 1.8817191123962402, "eval_runtime": 7.1714, "eval_samples_per_second": 61.912, "eval_steps_per_second": 0.279, "step": 270 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 2.1394, "step": 297 }, { "epoch": 11.0, "eval_accuracy": 0.6489209509025051, "eval_loss": 1.842598795890808, "eval_runtime": 7.136, "eval_samples_per_second": 62.22, "eval_steps_per_second": 0.28, "step": 297 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 2.0426, "step": 324 }, { "epoch": 12.0, "eval_accuracy": 0.6790733111349398, "eval_loss": 1.6428455114364624, "eval_runtime": 7.107, "eval_samples_per_second": 62.474, "eval_steps_per_second": 0.281, "step": 324 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.9533, "step": 351 }, { "epoch": 13.0, "eval_accuracy": 0.690136927023769, "eval_loss": 1.5633041858673096, "eval_runtime": 7.193, "eval_samples_per_second": 61.726, "eval_steps_per_second": 0.278, "step": 351 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.8598, "step": 378 }, { "epoch": 14.0, "eval_accuracy": 0.7024347351505412, "eval_loss": 1.4617172479629517, "eval_runtime": 7.1532, "eval_samples_per_second": 62.071, "eval_steps_per_second": 0.28, "step": 378 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.7533, "step": 405 }, { "epoch": 15.0, "eval_accuracy": 0.7221761780724604, "eval_loss": 1.3566689491271973, "eval_runtime": 7.1795, "eval_samples_per_second": 61.843, "eval_steps_per_second": 0.279, "step": 405 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.6829, "step": 432 }, { "epoch": 16.0, "eval_accuracy": 0.7226731441436104, "eval_loss": 1.3594402074813843, "eval_runtime": 7.1593, "eval_samples_per_second": 62.017, "eval_steps_per_second": 0.279, "step": 432 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.6363, "step": 459 }, { "epoch": 17.0, "eval_accuracy": 0.7319418805454654, "eval_loss": 1.3049547672271729, "eval_runtime": 7.1172, "eval_samples_per_second": 62.384, "eval_steps_per_second": 0.281, "step": 459 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.5438, "step": 486 }, { "epoch": 18.0, "eval_accuracy": 0.7309124489856713, "eval_loss": 1.3055365085601807, "eval_runtime": 7.1524, "eval_samples_per_second": 62.077, "eval_steps_per_second": 0.28, "step": 486 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.5025, "step": 513 }, { "epoch": 19.0, "eval_accuracy": 0.7245466228759103, "eval_loss": 1.3259419202804565, "eval_runtime": 7.228, "eval_samples_per_second": 61.428, "eval_steps_per_second": 0.277, "step": 513 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.4319, "step": 540 }, { "epoch": 20.0, "eval_accuracy": 0.7438057920631648, "eval_loss": 1.2239311933517456, "eval_runtime": 7.1524, "eval_samples_per_second": 62.077, "eval_steps_per_second": 0.28, "step": 540 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.3768, "step": 567 }, { "epoch": 21.0, "eval_accuracy": 0.7472951634598515, "eval_loss": 1.1993966102600098, "eval_runtime": 7.1565, "eval_samples_per_second": 62.042, "eval_steps_per_second": 0.279, "step": 567 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.3384, "step": 594 }, { "epoch": 22.0, "eval_accuracy": 0.7496693916806925, "eval_loss": 1.1782174110412598, "eval_runtime": 7.2171, "eval_samples_per_second": 61.52, "eval_steps_per_second": 0.277, "step": 594 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.308, "step": 621 }, { "epoch": 23.0, "eval_accuracy": 0.7524660728164047, "eval_loss": 1.1727790832519531, "eval_runtime": 7.1544, "eval_samples_per_second": 62.06, "eval_steps_per_second": 0.28, "step": 621 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.3139, "step": 648 }, { "epoch": 24.0, "eval_accuracy": 0.7564876470222167, "eval_loss": 1.1401317119598389, "eval_runtime": 6.2932, "eval_samples_per_second": 70.552, "eval_steps_per_second": 0.318, "step": 648 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.2701, "step": 675 }, { "epoch": 25.0, "eval_accuracy": 0.7505940307157346, "eval_loss": 1.1718164682388306, "eval_runtime": 7.2194, "eval_samples_per_second": 61.501, "eval_steps_per_second": 0.277, "step": 675 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.2614, "step": 702 }, { "epoch": 26.0, "eval_accuracy": 0.7606208191526816, "eval_loss": 1.111540675163269, "eval_runtime": 7.1423, "eval_samples_per_second": 62.165, "eval_steps_per_second": 0.28, "step": 702 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.2549, "step": 729 }, { "epoch": 27.0, "eval_accuracy": 0.7517491504126567, "eval_loss": 1.1640406847000122, "eval_runtime": 7.113, "eval_samples_per_second": 62.421, "eval_steps_per_second": 0.281, "step": 729 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.2287, "step": 756 }, { "epoch": 28.0, "eval_accuracy": 0.7552987090963674, "eval_loss": 1.1474734544754028, "eval_runtime": 7.171, "eval_samples_per_second": 61.916, "eval_steps_per_second": 0.279, "step": 756 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.1967, "step": 783 }, { "epoch": 29.0, "eval_accuracy": 0.7647311090144953, "eval_loss": 1.0948566198349, "eval_runtime": 7.1733, "eval_samples_per_second": 61.896, "eval_steps_per_second": 0.279, "step": 783 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.1938, "step": 810 }, { "epoch": 30.0, "eval_accuracy": 0.7628133294013565, "eval_loss": 1.104235291481018, "eval_runtime": 7.2106, "eval_samples_per_second": 61.576, "eval_steps_per_second": 0.277, "step": 810 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 1.1831, "step": 837 }, { "epoch": 31.0, "eval_accuracy": 0.7568993506493507, "eval_loss": 1.1557021141052246, "eval_runtime": 7.147, "eval_samples_per_second": 62.124, "eval_steps_per_second": 0.28, "step": 837 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 1.1783, "step": 864 }, { "epoch": 32.0, "eval_accuracy": 0.7663818512012861, "eval_loss": 1.0878251791000366, "eval_runtime": 7.2016, "eval_samples_per_second": 61.653, "eval_steps_per_second": 0.278, "step": 864 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 1.1571, "step": 891 }, { "epoch": 33.0, "eval_accuracy": 0.7626662971175167, "eval_loss": 1.1019645929336548, "eval_runtime": 7.115, "eval_samples_per_second": 62.403, "eval_steps_per_second": 0.281, "step": 891 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 1.1511, "step": 918 }, { "epoch": 34.0, "eval_accuracy": 0.7707477642809286, "eval_loss": 1.0570372343063354, "eval_runtime": 7.1537, "eval_samples_per_second": 62.066, "eval_steps_per_second": 0.28, "step": 918 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 1.1332, "step": 945 }, { "epoch": 35.0, "eval_accuracy": 0.7779759669545625, "eval_loss": 1.0286684036254883, "eval_runtime": 7.1266, "eval_samples_per_second": 62.301, "eval_steps_per_second": 0.281, "step": 945 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 1.1343, "step": 972 }, { "epoch": 36.0, "eval_accuracy": 0.7709718033554885, "eval_loss": 1.0590564012527466, "eval_runtime": 7.1932, "eval_samples_per_second": 61.725, "eval_steps_per_second": 0.278, "step": 972 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 1.1164, "step": 999 }, { "epoch": 37.0, "eval_accuracy": 0.7806313208703647, "eval_loss": 1.0081170797348022, "eval_runtime": 7.1199, "eval_samples_per_second": 62.36, "eval_steps_per_second": 0.281, "step": 999 } ], "max_steps": 1080, "num_train_epochs": 40, "total_flos": 118496962805760.0, "trial_name": null, "trial_params": null }