{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 3180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.3832186408159307e-05, "loss": 2.8049, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.5764944934955999, "eval_loss": 2.5287580490112305, "eval_runtime": 22.0895, "eval_samples_per_second": 80.128, "eval_steps_per_second": 0.272, "step": 106 }, { "epoch": 2.0, "learning_rate": 1.5888124272106204e-05, "loss": 2.4814, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.5855989573905803, "eval_loss": 2.429943561553955, "eval_runtime": 20.8189, "eval_samples_per_second": 85.019, "eval_steps_per_second": 0.288, "step": 212 }, { "epoch": 3.0, "learning_rate": 1.7090770826327895e-05, "loss": 2.3778, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.5945957840073934, "eval_loss": 2.348792314529419, "eval_runtime": 20.8031, "eval_samples_per_second": 85.084, "eval_steps_per_second": 0.288, "step": 318 }, { "epoch": 4.0, "learning_rate": 1.7944062136053104e-05, "loss": 2.307, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.6146158043850404, "eval_loss": 2.1809699535369873, "eval_runtime": 20.6813, "eval_samples_per_second": 85.585, "eval_steps_per_second": 0.29, "step": 424 }, { "epoch": 5.0, "learning_rate": 1.860592629580032e-05, "loss": 2.0807, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.7058374199856394, "eval_loss": 1.4772239923477173, "eval_runtime": 20.7293, "eval_samples_per_second": 85.386, "eval_steps_per_second": 0.289, "step": 530 }, { "epoch": 6.0, "learning_rate": 1.9146708690274792e-05, "loss": 1.6063, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.7498181077384244, "eval_loss": 1.183250069618225, "eval_runtime": 20.8379, "eval_samples_per_second": 84.941, "eval_steps_per_second": 0.288, "step": 636 }, { "epoch": 7.0, "learning_rate": 1.9603933689955228e-05, "loss": 1.3122, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.7694247060500784, "eval_loss": 1.0655418634414673, "eval_runtime": 20.7257, "eval_samples_per_second": 85.401, "eval_steps_per_second": 0.289, "step": 742 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.1911, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.7711616016924874, "eval_loss": 1.059058427810669, "eval_runtime": 20.8383, "eval_samples_per_second": 84.94, "eval_steps_per_second": 0.288, "step": 848 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.1273, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.7756633909375628, "eval_loss": 1.020793080329895, "eval_runtime": 20.7787, "eval_samples_per_second": 85.183, "eval_steps_per_second": 0.289, "step": 954 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.0954, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.7866386428464195, "eval_loss": 0.9602928161621094, "eval_runtime": 20.6664, "eval_samples_per_second": 85.646, "eval_steps_per_second": 0.29, "step": 1060 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.0565, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.7900944085581504, "eval_loss": 0.9395522475242615, "eval_runtime": 20.7654, "eval_samples_per_second": 85.238, "eval_steps_per_second": 0.289, "step": 1166 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.0351, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.7892073832790445, "eval_loss": 0.9436615705490112, "eval_runtime": 20.1273, "eval_samples_per_second": 87.94, "eval_steps_per_second": 0.298, "step": 1272 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.0136, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.7930297660705511, "eval_loss": 0.9114692211151123, "eval_runtime": 20.7577, "eval_samples_per_second": 85.269, "eval_steps_per_second": 0.289, "step": 1378 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 0.996, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.7986084994358782, "eval_loss": 0.9008192420005798, "eval_runtime": 20.8068, "eval_samples_per_second": 85.068, "eval_steps_per_second": 0.288, "step": 1484 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 0.985, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.8006165209970277, "eval_loss": 0.8874076008796692, "eval_runtime": 20.7048, "eval_samples_per_second": 85.487, "eval_steps_per_second": 0.29, "step": 1590 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 0.9654, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.8016079609686441, "eval_loss": 0.8684276938438416, "eval_runtime": 20.7798, "eval_samples_per_second": 85.179, "eval_steps_per_second": 0.289, "step": 1696 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 0.9529, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.7997229383352594, "eval_loss": 0.8870733976364136, "eval_runtime": 20.7541, "eval_samples_per_second": 85.284, "eval_steps_per_second": 0.289, "step": 1802 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 0.9442, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.8027364602986559, "eval_loss": 0.8661928772926331, "eval_runtime": 20.7884, "eval_samples_per_second": 85.144, "eval_steps_per_second": 0.289, "step": 1908 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 0.9327, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.8069067033935976, "eval_loss": 0.8454752564430237, "eval_runtime": 20.7342, "eval_samples_per_second": 85.366, "eval_steps_per_second": 0.289, "step": 2014 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 0.9204, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.8092386081748497, "eval_loss": 0.8449112772941589, "eval_runtime": 20.6952, "eval_samples_per_second": 85.527, "eval_steps_per_second": 0.29, "step": 2120 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 0.9097, "step": 2226 }, { "epoch": 21.0, "eval_accuracy": 0.809981731823164, "eval_loss": 0.8322621583938599, "eval_runtime": 20.7947, "eval_samples_per_second": 85.118, "eval_steps_per_second": 0.289, "step": 2226 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 0.9024, "step": 2332 }, { "epoch": 22.0, "eval_accuracy": 0.8133925596890617, "eval_loss": 0.8194364905357361, "eval_runtime": 20.8718, "eval_samples_per_second": 84.803, "eval_steps_per_second": 0.287, "step": 2332 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 0.895, "step": 2438 }, { "epoch": 23.0, "eval_accuracy": 0.8129690134121053, "eval_loss": 0.8122667670249939, "eval_runtime": 20.8197, "eval_samples_per_second": 85.016, "eval_steps_per_second": 0.288, "step": 2438 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 0.891, "step": 2544 }, { "epoch": 24.0, "eval_accuracy": 0.8167250838730983, "eval_loss": 0.7972639799118042, "eval_runtime": 20.743, "eval_samples_per_second": 85.33, "eval_steps_per_second": 0.289, "step": 2544 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 0.8784, "step": 2650 }, { "epoch": 25.0, "eval_accuracy": 0.8133820647746538, "eval_loss": 0.8083846569061279, "eval_runtime": 20.8823, "eval_samples_per_second": 84.761, "eval_steps_per_second": 0.287, "step": 2650 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 0.8771, "step": 2756 }, { "epoch": 26.0, "eval_accuracy": 0.8162191337201458, "eval_loss": 0.8039098381996155, "eval_runtime": 20.8754, "eval_samples_per_second": 84.789, "eval_steps_per_second": 0.287, "step": 2756 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 0.8713, "step": 2862 }, { "epoch": 27.0, "eval_accuracy": 0.8169723253335294, "eval_loss": 0.7862613797187805, "eval_runtime": 20.8372, "eval_samples_per_second": 84.944, "eval_steps_per_second": 0.288, "step": 2862 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 0.8707, "step": 2968 }, { "epoch": 28.0, "eval_accuracy": 0.8173417825447652, "eval_loss": 0.7960302829742432, "eval_runtime": 20.7882, "eval_samples_per_second": 85.145, "eval_steps_per_second": 0.289, "step": 2968 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 0.8571, "step": 3074 }, { "epoch": 29.0, "eval_accuracy": 0.8181885887496183, "eval_loss": 0.7875123023986816, "eval_runtime": 20.7114, "eval_samples_per_second": 85.46, "eval_steps_per_second": 0.29, "step": 3074 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.8593, "step": 3180 }, { "epoch": 30.0, "eval_accuracy": 0.8199450484519026, "eval_loss": 0.7765340805053711, "eval_runtime": 20.9094, "eval_samples_per_second": 84.651, "eval_steps_per_second": 0.287, "step": 3180 } ], "max_steps": 4240, "num_train_epochs": 40, "total_flos": 1006048479543296.0, "trial_name": null, "trial_params": null }