{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.0, "global_step": 3922, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.3832186408159307e-05, "loss": 3.0358, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.543347000166316, "eval_loss": 2.7585761547088623, "eval_runtime": 25.1028, "eval_samples_per_second": 70.51, "eval_steps_per_second": 0.239, "step": 106 }, { "epoch": 2.0, "learning_rate": 1.5888124272106204e-05, "loss": 2.7114, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.55558049451544, "eval_loss": 2.6144614219665527, "eval_runtime": 24.4673, "eval_samples_per_second": 72.341, "eval_steps_per_second": 0.245, "step": 212 }, { "epoch": 3.0, "learning_rate": 1.7090770826327895e-05, "loss": 2.4868, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.6212061200839091, "eval_loss": 2.034228563308716, "eval_runtime": 24.2933, "eval_samples_per_second": 72.86, "eval_steps_per_second": 0.247, "step": 318 }, { "epoch": 4.0, "learning_rate": 1.7944062136053104e-05, "loss": 2.1388, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.6854297952501626, "eval_loss": 1.5866005420684814, "eval_runtime": 24.3894, "eval_samples_per_second": 72.572, "eval_steps_per_second": 0.246, "step": 424 }, { "epoch": 5.0, "learning_rate": 1.860592629580032e-05, "loss": 1.8311, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.7191362268359381, "eval_loss": 1.3652117252349854, "eval_runtime": 24.3545, "eval_samples_per_second": 72.677, "eval_steps_per_second": 0.246, "step": 530 }, { "epoch": 6.0, "learning_rate": 1.9146708690274792e-05, "loss": 1.5704, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.7248188353074707, "eval_loss": 1.3337750434875488, "eval_runtime": 24.3217, "eval_samples_per_second": 72.775, "eval_steps_per_second": 0.247, "step": 636 }, { "epoch": 7.0, "learning_rate": 1.9603933689955228e-05, "loss": 1.4498, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.7381350781490347, "eval_loss": 1.2443161010742188, "eval_runtime": 24.5188, "eval_samples_per_second": 72.189, "eval_steps_per_second": 0.245, "step": 742 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.3744, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.7413300423856433, "eval_loss": 1.2314091920852661, "eval_runtime": 24.3764, "eval_samples_per_second": 72.611, "eval_steps_per_second": 0.246, "step": 848 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.3207, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.7439575072718211, "eval_loss": 1.2015495300292969, "eval_runtime": 24.5167, "eval_samples_per_second": 72.196, "eval_steps_per_second": 0.245, "step": 954 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.2892, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.7558549767302207, "eval_loss": 1.1335448026657104, "eval_runtime": 24.3508, "eval_samples_per_second": 72.688, "eval_steps_per_second": 0.246, "step": 1060 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.25, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.7594773916347587, "eval_loss": 1.1179267168045044, "eval_runtime": 24.3594, "eval_samples_per_second": 72.662, "eval_steps_per_second": 0.246, "step": 1166 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.2274, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.7580528411147304, "eval_loss": 1.1233829259872437, "eval_runtime": 24.3279, "eval_samples_per_second": 72.756, "eval_steps_per_second": 0.247, "step": 1272 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.2027, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.7637997528180235, "eval_loss": 1.0827549695968628, "eval_runtime": 24.4052, "eval_samples_per_second": 72.525, "eval_steps_per_second": 0.246, "step": 1378 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.1838, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.7666054329273585, "eval_loss": 1.0769394636154175, "eval_runtime": 24.4017, "eval_samples_per_second": 72.536, "eval_steps_per_second": 0.246, "step": 1484 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.1736, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.7698640416703452, "eval_loss": 1.0607359409332275, "eval_runtime": 24.3587, "eval_samples_per_second": 72.664, "eval_steps_per_second": 0.246, "step": 1590 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.1534, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.7714122091985482, "eval_loss": 1.0373061895370483, "eval_runtime": 24.4342, "eval_samples_per_second": 72.439, "eval_steps_per_second": 0.246, "step": 1696 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.1388, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.7687794470619611, "eval_loss": 1.0571210384368896, "eval_runtime": 24.3831, "eval_samples_per_second": 72.591, "eval_steps_per_second": 0.246, "step": 1802 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.1273, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.7713278633378673, "eval_loss": 1.0348763465881348, "eval_runtime": 24.3437, "eval_samples_per_second": 72.709, "eval_steps_per_second": 0.246, "step": 1908 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.1161, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.777816642254917, "eval_loss": 1.0058482885360718, "eval_runtime": 24.086, "eval_samples_per_second": 73.487, "eval_steps_per_second": 0.249, "step": 2014 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.1037, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.778376203991329, "eval_loss": 1.0106089115142822, "eval_runtime": 24.3319, "eval_samples_per_second": 72.744, "eval_steps_per_second": 0.247, "step": 2120 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.0897, "step": 2226 }, { "epoch": 21.0, "eval_accuracy": 0.7792838874680307, "eval_loss": 0.9971184730529785, "eval_runtime": 24.2829, "eval_samples_per_second": 72.891, "eval_steps_per_second": 0.247, "step": 2226 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.0833, "step": 2332 }, { "epoch": 22.0, "eval_accuracy": 0.7836239126411253, "eval_loss": 0.9799665212631226, "eval_runtime": 24.3154, "eval_samples_per_second": 72.793, "eval_steps_per_second": 0.247, "step": 2332 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.0747, "step": 2438 }, { "epoch": 23.0, "eval_accuracy": 0.7836160467857196, "eval_loss": 0.9719156622886658, "eval_runtime": 24.3874, "eval_samples_per_second": 72.578, "eval_steps_per_second": 0.246, "step": 2438 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.0708, "step": 2544 }, { "epoch": 24.0, "eval_accuracy": 0.7880468638591382, "eval_loss": 0.9512822031974792, "eval_runtime": 26.1688, "eval_samples_per_second": 67.638, "eval_steps_per_second": 0.229, "step": 2544 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.0577, "step": 2650 }, { "epoch": 25.0, "eval_accuracy": 0.7839817403876498, "eval_loss": 0.9685712456703186, "eval_runtime": 24.2295, "eval_samples_per_second": 73.052, "eval_steps_per_second": 0.248, "step": 2650 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.0551, "step": 2756 }, { "epoch": 26.0, "eval_accuracy": 0.7858663548179133, "eval_loss": 0.9629907608032227, "eval_runtime": 24.4249, "eval_samples_per_second": 72.467, "eval_steps_per_second": 0.246, "step": 2756 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.0489, "step": 2862 }, { "epoch": 27.0, "eval_accuracy": 0.7881803814914183, "eval_loss": 0.9433470964431763, "eval_runtime": 24.4582, "eval_samples_per_second": 72.368, "eval_steps_per_second": 0.245, "step": 2862 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.0483, "step": 2968 }, { "epoch": 28.0, "eval_accuracy": 0.7872097449562239, "eval_loss": 0.9574456214904785, "eval_runtime": 24.3523, "eval_samples_per_second": 72.683, "eval_steps_per_second": 0.246, "step": 2968 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.0327, "step": 3074 }, { "epoch": 29.0, "eval_accuracy": 0.7909898637829464, "eval_loss": 0.9401029348373413, "eval_runtime": 24.3913, "eval_samples_per_second": 72.567, "eval_steps_per_second": 0.246, "step": 3074 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.0362, "step": 3180 }, { "epoch": 30.0, "eval_accuracy": 0.7919005554242496, "eval_loss": 0.9270448684692383, "eval_runtime": 24.4335, "eval_samples_per_second": 72.441, "eval_steps_per_second": 0.246, "step": 3180 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 1.0243, "step": 3286 }, { "epoch": 31.0, "eval_accuracy": 0.7986601387439376, "eval_loss": 0.8908094167709351, "eval_runtime": 24.1448, "eval_samples_per_second": 73.308, "eval_steps_per_second": 0.249, "step": 3286 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 1.0189, "step": 3392 }, { "epoch": 32.0, "eval_accuracy": 0.7921468329354021, "eval_loss": 0.9229845404624939, "eval_runtime": 24.3268, "eval_samples_per_second": 72.759, "eval_steps_per_second": 0.247, "step": 3392 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 1.0079, "step": 3498 }, { "epoch": 33.0, "eval_accuracy": 0.7888655988210297, "eval_loss": 0.9461591243743896, "eval_runtime": 24.4302, "eval_samples_per_second": 72.451, "eval_steps_per_second": 0.246, "step": 3498 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 1.0018, "step": 3604 }, { "epoch": 34.0, "eval_accuracy": 0.7890031028955456, "eval_loss": 0.9435957074165344, "eval_runtime": 24.4529, "eval_samples_per_second": 72.384, "eval_steps_per_second": 0.245, "step": 3604 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 1.0063, "step": 3710 }, { "epoch": 35.0, "eval_accuracy": 0.7977425395315658, "eval_loss": 0.8957004547119141, "eval_runtime": 24.4025, "eval_samples_per_second": 72.534, "eval_steps_per_second": 0.246, "step": 3710 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 0.9982, "step": 3816 }, { "epoch": 36.0, "eval_accuracy": 0.7993480649855951, "eval_loss": 0.8885225057601929, "eval_runtime": 24.4063, "eval_samples_per_second": 72.522, "eval_steps_per_second": 0.246, "step": 3816 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 0.9911, "step": 3922 }, { "epoch": 37.0, "eval_accuracy": 0.8001705662056435, "eval_loss": 0.8866317868232727, "eval_runtime": 24.3478, "eval_samples_per_second": 72.697, "eval_steps_per_second": 0.246, "step": 3922 } ], "max_steps": 4240, "num_train_epochs": 40, "total_flos": 472663961108480.0, "trial_name": null, "trial_params": null }