{ "best_metric": null, "best_model_checkpoint": null, "epoch": 31.998864926220204, "global_step": 7040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8224893386183709, "eval_loss": 1.1622651815414429, "eval_runtime": 5.9816, "eval_samples_per_second": 15.213, "eval_steps_per_second": 7.69, "step": 220 }, { "epoch": 2.0, "eval_accuracy": 0.8538665635439829, "eval_loss": 0.9566460251808167, "eval_runtime": 5.9768, "eval_samples_per_second": 15.226, "eval_steps_per_second": 7.696, "step": 440 }, { "epoch": 2.27, "learning_rate": 4.6448863636363636e-05, "loss": 1.1942, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.8709140322043548, "eval_loss": 0.8456286191940308, "eval_runtime": 5.9871, "eval_samples_per_second": 15.199, "eval_steps_per_second": 7.683, "step": 660 }, { "epoch": 4.0, "eval_accuracy": 0.8801198801198801, "eval_loss": 0.7718582153320312, "eval_runtime": 5.6728, "eval_samples_per_second": 16.041, "eval_steps_per_second": 8.109, "step": 880 }, { "epoch": 4.54, "learning_rate": 4.289772727272727e-05, "loss": 0.7805, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.8871880807364678, "eval_loss": 0.7224407196044922, "eval_runtime": 5.988, "eval_samples_per_second": 15.197, "eval_steps_per_second": 7.682, "step": 1100 }, { "epoch": 6.0, "eval_accuracy": 0.892816860558796, "eval_loss": 0.6894707679748535, "eval_runtime": 5.9958, "eval_samples_per_second": 15.177, "eval_steps_per_second": 7.672, "step": 1320 }, { "epoch": 6.82, "learning_rate": 3.934659090909091e-05, "loss": 0.6257, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.8972103165651553, "eval_loss": 0.6574080586433411, "eval_runtime": 5.9909, "eval_samples_per_second": 15.19, "eval_steps_per_second": 7.678, "step": 1540 }, { "epoch": 8.0, "eval_accuracy": 0.9014426433781273, "eval_loss": 0.6289474368095398, "eval_runtime": 5.6911, "eval_samples_per_second": 15.99, "eval_steps_per_second": 8.083, "step": 1760 }, { "epoch": 9.0, "eval_accuracy": 0.9045470658373884, "eval_loss": 0.6054205298423767, "eval_runtime": 5.6786, "eval_samples_per_second": 16.025, "eval_steps_per_second": 8.101, "step": 1980 }, { "epoch": 9.09, "learning_rate": 3.579545454545455e-05, "loss": 0.5385, "step": 2000 }, { "epoch": 10.0, "eval_accuracy": 0.9076622302428754, "eval_loss": 0.5881273746490479, "eval_runtime": 6.0014, "eval_samples_per_second": 15.163, "eval_steps_per_second": 7.665, "step": 2200 }, { "epoch": 11.0, "eval_accuracy": 0.9101865876059424, "eval_loss": 0.5709272623062134, "eval_runtime": 5.9886, "eval_samples_per_second": 15.195, "eval_steps_per_second": 7.681, "step": 2420 }, { "epoch": 11.36, "learning_rate": 3.2244318181818185e-05, "loss": 0.4778, "step": 2500 }, { "epoch": 12.0, "eval_accuracy": 0.9120986540341379, "eval_loss": 0.5591339468955994, "eval_runtime": 5.9874, "eval_samples_per_second": 15.199, "eval_steps_per_second": 7.683, "step": 2640 }, { "epoch": 13.0, "eval_accuracy": 0.9142900110642046, "eval_loss": 0.5496613383293152, "eval_runtime": 5.9826, "eval_samples_per_second": 15.211, "eval_steps_per_second": 7.689, "step": 2860 }, { "epoch": 13.64, "learning_rate": 2.870028409090909e-05, "loss": 0.427, "step": 3000 }, { "epoch": 14.0, "eval_accuracy": 0.9161053999763678, "eval_loss": 0.5385328531265259, "eval_runtime": 5.9885, "eval_samples_per_second": 15.196, "eval_steps_per_second": 7.681, "step": 3080 }, { "epoch": 15.0, "eval_accuracy": 0.9179637566734341, "eval_loss": 0.5258467793464661, "eval_runtime": 5.9812, "eval_samples_per_second": 15.214, "eval_steps_per_second": 7.691, "step": 3300 }, { "epoch": 15.91, "learning_rate": 2.5149147727272725e-05, "loss": 0.394, "step": 3500 }, { "epoch": 16.0, "eval_accuracy": 0.9195428227686292, "eval_loss": 0.5170450806617737, "eval_runtime": 5.6775, "eval_samples_per_second": 16.028, "eval_steps_per_second": 8.102, "step": 3520 }, { "epoch": 17.0, "eval_accuracy": 0.9211648566487276, "eval_loss": 0.5156892538070679, "eval_runtime": 5.9945, "eval_samples_per_second": 15.18, "eval_steps_per_second": 7.674, "step": 3740 }, { "epoch": 18.0, "eval_accuracy": 0.9220671801316963, "eval_loss": 0.5037761926651001, "eval_runtime": 5.687, "eval_samples_per_second": 16.001, "eval_steps_per_second": 8.089, "step": 3960 }, { "epoch": 18.18, "learning_rate": 2.1598011363636363e-05, "loss": 0.363, "step": 4000 }, { "epoch": 19.0, "eval_accuracy": 0.9233884395174717, "eval_loss": 0.49766021966934204, "eval_runtime": 5.6849, "eval_samples_per_second": 16.007, "eval_steps_per_second": 8.092, "step": 4180 }, { "epoch": 20.0, "eval_accuracy": 0.9236462462268914, "eval_loss": 0.4975946843624115, "eval_runtime": 5.9923, "eval_samples_per_second": 15.186, "eval_steps_per_second": 7.677, "step": 4400 }, { "epoch": 20.45, "learning_rate": 1.8046875000000003e-05, "loss": 0.3392, "step": 4500 }, { "epoch": 21.0, "eval_accuracy": 0.9246882150107957, "eval_loss": 0.49241966009140015, "eval_runtime": 5.9923, "eval_samples_per_second": 15.186, "eval_steps_per_second": 7.677, "step": 4620 }, { "epoch": 22.0, "eval_accuracy": 0.9255046029239578, "eval_loss": 0.4887617826461792, "eval_runtime": 5.69, "eval_samples_per_second": 15.993, "eval_steps_per_second": 8.084, "step": 4840 }, { "epoch": 22.73, "learning_rate": 1.4495738636363637e-05, "loss": 0.33, "step": 5000 }, { "epoch": 23.0, "eval_accuracy": 0.9262028294286359, "eval_loss": 0.4889785051345825, "eval_runtime": 6.0024, "eval_samples_per_second": 15.161, "eval_steps_per_second": 7.664, "step": 5060 }, { "epoch": 24.0, "eval_accuracy": 0.9267936364710558, "eval_loss": 0.4856303334236145, "eval_runtime": 5.9867, "eval_samples_per_second": 15.2, "eval_steps_per_second": 7.684, "step": 5280 }, { "epoch": 25.0, "learning_rate": 1.0951704545454545e-05, "loss": 0.3058, "step": 5500 }, { "epoch": 25.0, "eval_accuracy": 0.9275348307606373, "eval_loss": 0.4802783131599426, "eval_runtime": 5.6869, "eval_samples_per_second": 16.002, "eval_steps_per_second": 8.089, "step": 5500 }, { "epoch": 26.0, "eval_accuracy": 0.9277389277389277, "eval_loss": 0.47845765948295593, "eval_runtime": 5.9773, "eval_samples_per_second": 15.224, "eval_steps_per_second": 7.696, "step": 5720 }, { "epoch": 27.0, "eval_accuracy": 0.9280611861257022, "eval_loss": 0.4813348948955536, "eval_runtime": 5.6869, "eval_samples_per_second": 16.002, "eval_steps_per_second": 8.089, "step": 5940 }, { "epoch": 27.27, "learning_rate": 7.407670454545455e-06, "loss": 0.2973, "step": 6000 }, { "epoch": 28.0, "eval_accuracy": 0.9281793475341863, "eval_loss": 0.4798637628555298, "eval_runtime": 5.6812, "eval_samples_per_second": 16.018, "eval_steps_per_second": 8.097, "step": 6160 }, { "epoch": 29.0, "eval_accuracy": 0.9285230898134124, "eval_loss": 0.47730037569999695, "eval_runtime": 5.9912, "eval_samples_per_second": 15.189, "eval_steps_per_second": 7.678, "step": 6380 }, { "epoch": 29.54, "learning_rate": 3.856534090909091e-06, "loss": 0.2931, "step": 6500 }, { "epoch": 30.0, "eval_accuracy": 0.9285982834369931, "eval_loss": 0.4778377413749695, "eval_runtime": 5.9957, "eval_samples_per_second": 15.177, "eval_steps_per_second": 7.672, "step": 6600 }, { "epoch": 31.0, "eval_accuracy": 0.9289527676624451, "eval_loss": 0.4756244122982025, "eval_runtime": 5.6812, "eval_samples_per_second": 16.018, "eval_steps_per_second": 8.097, "step": 6820 }, { "epoch": 31.82, "learning_rate": 3.053977272727273e-07, "loss": 0.2879, "step": 7000 }, { "epoch": 32.0, "eval_accuracy": 0.9289527676624451, "eval_loss": 0.47753414511680603, "eval_runtime": 5.6774, "eval_samples_per_second": 16.029, "eval_steps_per_second": 8.102, "step": 7040 }, { "epoch": 32.0, "step": 7040, "total_flos": 2.4539422830415053e+17, "train_loss": 0.47427067851478405, "train_runtime": 10360.152, "train_samples_per_second": 5.442, "train_steps_per_second": 0.68 } ], "max_steps": 7040, "num_train_epochs": 32, "total_flos": 2.4539422830415053e+17, "trial_name": null, "trial_params": null }