{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.0, "global_step": 999, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.2262943855309169e-05, "loss": 2.3623, "step": 27 }, { "epoch": 1.0, "eval_accuracy": 0.638755980861244, "eval_loss": 2.000427007675171, "eval_runtime": 13.6799, "eval_samples_per_second": 32.456, "eval_steps_per_second": 0.146, "step": 27 }, { "epoch": 2.0, "learning_rate": 1.4841962570206113e-05, "loss": 2.0119, "step": 54 }, { "epoch": 2.0, "eval_accuracy": 0.6597628650271401, "eval_loss": 1.8219548463821411, "eval_runtime": 13.3645, "eval_samples_per_second": 33.222, "eval_steps_per_second": 0.15, "step": 54 }, { "epoch": 3.0, "learning_rate": 1.6350591807078892e-05, "loss": 1.8529, "step": 81 }, { "epoch": 3.0, "eval_accuracy": 0.6718626849986864, "eval_loss": 1.7023260593414307, "eval_runtime": 13.2825, "eval_samples_per_second": 33.427, "eval_steps_per_second": 0.151, "step": 81 }, { "epoch": 4.0, "learning_rate": 1.7420981285103056e-05, "loss": 1.7397, "step": 108 }, { "epoch": 4.0, "eval_accuracy": 0.6914172365811054, "eval_loss": 1.5797321796417236, "eval_runtime": 13.2578, "eval_samples_per_second": 33.49, "eval_steps_per_second": 0.151, "step": 108 }, { "epoch": 5.0, "learning_rate": 1.825123986666868e-05, "loss": 1.6316, "step": 135 }, { "epoch": 5.0, "eval_accuracy": 0.704878691211332, "eval_loss": 1.4879175424575806, "eval_runtime": 13.2974, "eval_samples_per_second": 33.39, "eval_steps_per_second": 0.15, "step": 135 }, { "epoch": 6.0, "learning_rate": 1.892961052197583e-05, "loss": 1.5906, "step": 162 }, { "epoch": 6.0, "eval_accuracy": 0.7069794721407625, "eval_loss": 1.4811629056930542, "eval_runtime": 13.1968, "eval_samples_per_second": 33.644, "eval_steps_per_second": 0.152, "step": 162 }, { "epoch": 7.0, "learning_rate": 1.9503164738653782e-05, "loss": 1.5414, "step": 189 }, { "epoch": 7.0, "eval_accuracy": 0.7172597434035343, "eval_loss": 1.424712061882019, "eval_runtime": 13.0856, "eval_samples_per_second": 33.931, "eval_steps_per_second": 0.153, "step": 189 }, { "epoch": 8.0, "learning_rate": 1.9999999999999998e-05, "loss": 1.4984, "step": 216 }, { "epoch": 8.0, "eval_accuracy": 0.726310423413698, "eval_loss": 1.3407562971115112, "eval_runtime": 13.269, "eval_samples_per_second": 33.462, "eval_steps_per_second": 0.151, "step": 216 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.4759, "step": 243 }, { "epoch": 9.0, "eval_accuracy": 0.714683309437463, "eval_loss": 1.409363865852356, "eval_runtime": 13.377, "eval_samples_per_second": 33.191, "eval_steps_per_second": 0.15, "step": 243 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.4544, "step": 270 }, { "epoch": 10.0, "eval_accuracy": 0.7280686239830209, "eval_loss": 1.338972806930542, "eval_runtime": 13.2791, "eval_samples_per_second": 33.436, "eval_steps_per_second": 0.151, "step": 270 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.4112, "step": 297 }, { "epoch": 11.0, "eval_accuracy": 0.7280365296803653, "eval_loss": 1.3258930444717407, "eval_runtime": 13.3211, "eval_samples_per_second": 33.331, "eval_steps_per_second": 0.15, "step": 297 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.3741, "step": 324 }, { "epoch": 12.0, "eval_accuracy": 0.7261693058966325, "eval_loss": 1.3296260833740234, "eval_runtime": 13.335, "eval_samples_per_second": 33.296, "eval_steps_per_second": 0.15, "step": 324 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.363, "step": 351 }, { "epoch": 13.0, "eval_accuracy": 0.737372488408037, "eval_loss": 1.2715643644332886, "eval_runtime": 13.2444, "eval_samples_per_second": 33.524, "eval_steps_per_second": 0.151, "step": 351 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.3399, "step": 378 }, { "epoch": 14.0, "eval_accuracy": 0.7395378457059679, "eval_loss": 1.2554606199264526, "eval_runtime": 13.3293, "eval_samples_per_second": 33.31, "eval_steps_per_second": 0.15, "step": 378 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.3276, "step": 405 }, { "epoch": 15.0, "eval_accuracy": 0.7437385280359997, "eval_loss": 1.1987595558166504, "eval_runtime": 13.2943, "eval_samples_per_second": 33.398, "eval_steps_per_second": 0.15, "step": 405 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.314, "step": 432 }, { "epoch": 16.0, "eval_accuracy": 0.7433329382481925, "eval_loss": 1.2292665243148804, "eval_runtime": 13.3081, "eval_samples_per_second": 33.363, "eval_steps_per_second": 0.15, "step": 432 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.3116, "step": 459 }, { "epoch": 17.0, "eval_accuracy": 0.7434126298865632, "eval_loss": 1.2456351518630981, "eval_runtime": 13.0321, "eval_samples_per_second": 34.07, "eval_steps_per_second": 0.153, "step": 459 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.2997, "step": 486 }, { "epoch": 18.0, "eval_accuracy": 0.740932796900611, "eval_loss": 1.2431975603103638, "eval_runtime": 13.2685, "eval_samples_per_second": 33.463, "eval_steps_per_second": 0.151, "step": 486 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.2975, "step": 513 }, { "epoch": 19.0, "eval_accuracy": 0.7378796103191155, "eval_loss": 1.2589606046676636, "eval_runtime": 13.2957, "eval_samples_per_second": 33.394, "eval_steps_per_second": 0.15, "step": 513 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.274, "step": 540 }, { "epoch": 20.0, "eval_accuracy": 0.7467216737881244, "eval_loss": 1.2022136449813843, "eval_runtime": 13.2148, "eval_samples_per_second": 33.599, "eval_steps_per_second": 0.151, "step": 540 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.2447, "step": 567 }, { "epoch": 21.0, "eval_accuracy": 0.7521932389753188, "eval_loss": 1.1791794300079346, "eval_runtime": 13.3942, "eval_samples_per_second": 33.149, "eval_steps_per_second": 0.149, "step": 567 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.2338, "step": 594 }, { "epoch": 22.0, "eval_accuracy": 0.749752006492921, "eval_loss": 1.1663883924484253, "eval_runtime": 13.2721, "eval_samples_per_second": 33.454, "eval_steps_per_second": 0.151, "step": 594 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.2129, "step": 621 }, { "epoch": 23.0, "eval_accuracy": 0.757722556143656, "eval_loss": 1.1311209201812744, "eval_runtime": 13.3266, "eval_samples_per_second": 33.317, "eval_steps_per_second": 0.15, "step": 621 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.2233, "step": 648 }, { "epoch": 24.0, "eval_accuracy": 0.7568863020946808, "eval_loss": 1.140002727508545, "eval_runtime": 13.2772, "eval_samples_per_second": 33.441, "eval_steps_per_second": 0.151, "step": 648 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.2072, "step": 675 }, { "epoch": 25.0, "eval_accuracy": 0.7496811963830281, "eval_loss": 1.1746466159820557, "eval_runtime": 13.3397, "eval_samples_per_second": 33.284, "eval_steps_per_second": 0.15, "step": 675 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.2086, "step": 702 }, { "epoch": 26.0, "eval_accuracy": 0.7594424064563463, "eval_loss": 1.1140285730361938, "eval_runtime": 13.2221, "eval_samples_per_second": 33.58, "eval_steps_per_second": 0.151, "step": 702 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.1993, "step": 729 }, { "epoch": 27.0, "eval_accuracy": 0.746650287118247, "eval_loss": 1.1945430040359497, "eval_runtime": 13.2631, "eval_samples_per_second": 33.476, "eval_steps_per_second": 0.151, "step": 729 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.1866, "step": 756 }, { "epoch": 28.0, "eval_accuracy": 0.7611254579304546, "eval_loss": 1.116799235343933, "eval_runtime": 13.2563, "eval_samples_per_second": 33.494, "eval_steps_per_second": 0.151, "step": 756 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.1658, "step": 783 }, { "epoch": 29.0, "eval_accuracy": 0.7609447828200508, "eval_loss": 1.0980545282363892, "eval_runtime": 13.2601, "eval_samples_per_second": 33.484, "eval_steps_per_second": 0.151, "step": 783 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.1682, "step": 810 }, { "epoch": 30.0, "eval_accuracy": 0.7663264704147248, "eval_loss": 1.1021370887756348, "eval_runtime": 13.3382, "eval_samples_per_second": 33.288, "eval_steps_per_second": 0.15, "step": 810 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 1.1598, "step": 837 }, { "epoch": 31.0, "eval_accuracy": 0.7555162515584679, "eval_loss": 1.1446928977966309, "eval_runtime": 13.2762, "eval_samples_per_second": 33.443, "eval_steps_per_second": 0.151, "step": 837 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 1.1586, "step": 864 }, { "epoch": 32.0, "eval_accuracy": 0.7645990292129478, "eval_loss": 1.1078757047653198, "eval_runtime": 13.2153, "eval_samples_per_second": 33.597, "eval_steps_per_second": 0.151, "step": 864 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 1.1457, "step": 891 }, { "epoch": 33.0, "eval_accuracy": 0.7622519126288946, "eval_loss": 1.114358901977539, "eval_runtime": 13.3206, "eval_samples_per_second": 33.332, "eval_steps_per_second": 0.15, "step": 891 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 1.1456, "step": 918 }, { "epoch": 34.0, "eval_accuracy": 0.7681910865132537, "eval_loss": 1.0843584537506104, "eval_runtime": 13.3243, "eval_samples_per_second": 33.323, "eval_steps_per_second": 0.15, "step": 918 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 1.1338, "step": 945 }, { "epoch": 35.0, "eval_accuracy": 0.7738229401452542, "eval_loss": 1.075467586517334, "eval_runtime": 13.2446, "eval_samples_per_second": 33.523, "eval_steps_per_second": 0.151, "step": 945 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 1.1335, "step": 972 }, { "epoch": 36.0, "eval_accuracy": 0.7666764275256223, "eval_loss": 1.0833895206451416, "eval_runtime": 13.2849, "eval_samples_per_second": 33.421, "eval_steps_per_second": 0.151, "step": 972 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 1.1268, "step": 999 }, { "epoch": 37.0, "eval_accuracy": 0.7761633253632518, "eval_loss": 1.0331711769104004, "eval_runtime": 13.3282, "eval_samples_per_second": 33.313, "eval_steps_per_second": 0.15, "step": 999 } ], "max_steps": 1080, "num_train_epochs": 40, "total_flos": 118496962805760.0, "trial_name": null, "trial_params": null }