|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 37.0, |
|
"global_step": 999, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2262943855309169e-05, |
|
"loss": 2.3623, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.638755980861244, |
|
"eval_loss": 2.000427007675171, |
|
"eval_runtime": 13.6799, |
|
"eval_samples_per_second": 32.456, |
|
"eval_steps_per_second": 0.146, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4841962570206113e-05, |
|
"loss": 2.0119, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6597628650271401, |
|
"eval_loss": 1.8219548463821411, |
|
"eval_runtime": 13.3645, |
|
"eval_samples_per_second": 33.222, |
|
"eval_steps_per_second": 0.15, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.6350591807078892e-05, |
|
"loss": 1.8529, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6718626849986864, |
|
"eval_loss": 1.7023260593414307, |
|
"eval_runtime": 13.2825, |
|
"eval_samples_per_second": 33.427, |
|
"eval_steps_per_second": 0.151, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7420981285103056e-05, |
|
"loss": 1.7397, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6914172365811054, |
|
"eval_loss": 1.5797321796417236, |
|
"eval_runtime": 13.2578, |
|
"eval_samples_per_second": 33.49, |
|
"eval_steps_per_second": 0.151, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.825123986666868e-05, |
|
"loss": 1.6316, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.704878691211332, |
|
"eval_loss": 1.4879175424575806, |
|
"eval_runtime": 13.2974, |
|
"eval_samples_per_second": 33.39, |
|
"eval_steps_per_second": 0.15, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.892961052197583e-05, |
|
"loss": 1.5906, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7069794721407625, |
|
"eval_loss": 1.4811629056930542, |
|
"eval_runtime": 13.1968, |
|
"eval_samples_per_second": 33.644, |
|
"eval_steps_per_second": 0.152, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9503164738653782e-05, |
|
"loss": 1.5414, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7172597434035343, |
|
"eval_loss": 1.424712061882019, |
|
"eval_runtime": 13.0856, |
|
"eval_samples_per_second": 33.931, |
|
"eval_steps_per_second": 0.153, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.4984, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.726310423413698, |
|
"eval_loss": 1.3407562971115112, |
|
"eval_runtime": 13.269, |
|
"eval_samples_per_second": 33.462, |
|
"eval_steps_per_second": 0.151, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4759, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.714683309437463, |
|
"eval_loss": 1.409363865852356, |
|
"eval_runtime": 13.377, |
|
"eval_samples_per_second": 33.191, |
|
"eval_steps_per_second": 0.15, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4544, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7280686239830209, |
|
"eval_loss": 1.338972806930542, |
|
"eval_runtime": 13.2791, |
|
"eval_samples_per_second": 33.436, |
|
"eval_steps_per_second": 0.151, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4112, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7280365296803653, |
|
"eval_loss": 1.3258930444717407, |
|
"eval_runtime": 13.3211, |
|
"eval_samples_per_second": 33.331, |
|
"eval_steps_per_second": 0.15, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3741, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7261693058966325, |
|
"eval_loss": 1.3296260833740234, |
|
"eval_runtime": 13.335, |
|
"eval_samples_per_second": 33.296, |
|
"eval_steps_per_second": 0.15, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.363, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.737372488408037, |
|
"eval_loss": 1.2715643644332886, |
|
"eval_runtime": 13.2444, |
|
"eval_samples_per_second": 33.524, |
|
"eval_steps_per_second": 0.151, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3399, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7395378457059679, |
|
"eval_loss": 1.2554606199264526, |
|
"eval_runtime": 13.3293, |
|
"eval_samples_per_second": 33.31, |
|
"eval_steps_per_second": 0.15, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3276, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7437385280359997, |
|
"eval_loss": 1.1987595558166504, |
|
"eval_runtime": 13.2943, |
|
"eval_samples_per_second": 33.398, |
|
"eval_steps_per_second": 0.15, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.314, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7433329382481925, |
|
"eval_loss": 1.2292665243148804, |
|
"eval_runtime": 13.3081, |
|
"eval_samples_per_second": 33.363, |
|
"eval_steps_per_second": 0.15, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3116, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7434126298865632, |
|
"eval_loss": 1.2456351518630981, |
|
"eval_runtime": 13.0321, |
|
"eval_samples_per_second": 34.07, |
|
"eval_steps_per_second": 0.153, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2997, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.740932796900611, |
|
"eval_loss": 1.2431975603103638, |
|
"eval_runtime": 13.2685, |
|
"eval_samples_per_second": 33.463, |
|
"eval_steps_per_second": 0.151, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2975, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7378796103191155, |
|
"eval_loss": 1.2589606046676636, |
|
"eval_runtime": 13.2957, |
|
"eval_samples_per_second": 33.394, |
|
"eval_steps_per_second": 0.15, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.274, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7467216737881244, |
|
"eval_loss": 1.2022136449813843, |
|
"eval_runtime": 13.2148, |
|
"eval_samples_per_second": 33.599, |
|
"eval_steps_per_second": 0.151, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2447, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7521932389753188, |
|
"eval_loss": 1.1791794300079346, |
|
"eval_runtime": 13.3942, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 0.149, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2338, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.749752006492921, |
|
"eval_loss": 1.1663883924484253, |
|
"eval_runtime": 13.2721, |
|
"eval_samples_per_second": 33.454, |
|
"eval_steps_per_second": 0.151, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2129, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.757722556143656, |
|
"eval_loss": 1.1311209201812744, |
|
"eval_runtime": 13.3266, |
|
"eval_samples_per_second": 33.317, |
|
"eval_steps_per_second": 0.15, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2233, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7568863020946808, |
|
"eval_loss": 1.140002727508545, |
|
"eval_runtime": 13.2772, |
|
"eval_samples_per_second": 33.441, |
|
"eval_steps_per_second": 0.151, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2072, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7496811963830281, |
|
"eval_loss": 1.1746466159820557, |
|
"eval_runtime": 13.3397, |
|
"eval_samples_per_second": 33.284, |
|
"eval_steps_per_second": 0.15, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2086, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7594424064563463, |
|
"eval_loss": 1.1140285730361938, |
|
"eval_runtime": 13.2221, |
|
"eval_samples_per_second": 33.58, |
|
"eval_steps_per_second": 0.151, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1993, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.746650287118247, |
|
"eval_loss": 1.1945430040359497, |
|
"eval_runtime": 13.2631, |
|
"eval_samples_per_second": 33.476, |
|
"eval_steps_per_second": 0.151, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1866, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7611254579304546, |
|
"eval_loss": 1.116799235343933, |
|
"eval_runtime": 13.2563, |
|
"eval_samples_per_second": 33.494, |
|
"eval_steps_per_second": 0.151, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1658, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7609447828200508, |
|
"eval_loss": 1.0980545282363892, |
|
"eval_runtime": 13.2601, |
|
"eval_samples_per_second": 33.484, |
|
"eval_steps_per_second": 0.151, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1682, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7663264704147248, |
|
"eval_loss": 1.1021370887756348, |
|
"eval_runtime": 13.3382, |
|
"eval_samples_per_second": 33.288, |
|
"eval_steps_per_second": 0.15, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1598, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7555162515584679, |
|
"eval_loss": 1.1446928977966309, |
|
"eval_runtime": 13.2762, |
|
"eval_samples_per_second": 33.443, |
|
"eval_steps_per_second": 0.151, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1586, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7645990292129478, |
|
"eval_loss": 1.1078757047653198, |
|
"eval_runtime": 13.2153, |
|
"eval_samples_per_second": 33.597, |
|
"eval_steps_per_second": 0.151, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1457, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7622519126288946, |
|
"eval_loss": 1.114358901977539, |
|
"eval_runtime": 13.3206, |
|
"eval_samples_per_second": 33.332, |
|
"eval_steps_per_second": 0.15, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1456, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7681910865132537, |
|
"eval_loss": 1.0843584537506104, |
|
"eval_runtime": 13.3243, |
|
"eval_samples_per_second": 33.323, |
|
"eval_steps_per_second": 0.15, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1338, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7738229401452542, |
|
"eval_loss": 1.075467586517334, |
|
"eval_runtime": 13.2446, |
|
"eval_samples_per_second": 33.523, |
|
"eval_steps_per_second": 0.151, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1335, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7666764275256223, |
|
"eval_loss": 1.0833895206451416, |
|
"eval_runtime": 13.2849, |
|
"eval_samples_per_second": 33.421, |
|
"eval_steps_per_second": 0.151, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1268, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7761633253632518, |
|
"eval_loss": 1.0331711769104004, |
|
"eval_runtime": 13.3282, |
|
"eval_samples_per_second": 33.313, |
|
"eval_steps_per_second": 0.15, |
|
"step": 999 |
|
} |
|
], |
|
"max_steps": 1080, |
|
"num_train_epochs": 40, |
|
"total_flos": 118496962805760.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|