{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 500, "global_step": 7400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.22874597126094132, "eval_loss": 5.837828636169434, "eval_runtime": 19.7897, "eval_samples_per_second": 59.071, "eval_steps_per_second": 1.87, "step": 148 }, { "epoch": 2.0, "eval_accuracy": 0.24914672818864436, "eval_loss": 5.573156356811523, "eval_runtime": 19.7414, "eval_samples_per_second": 59.216, "eval_steps_per_second": 1.874, "step": 296 }, { "epoch": 3.0, "eval_accuracy": 0.26800943567410634, "eval_loss": 5.379988670349121, "eval_runtime": 19.7422, "eval_samples_per_second": 59.213, "eval_steps_per_second": 1.874, "step": 444 }, { "epoch": 3.38, "learning_rate": 9.324324324324325e-06, "loss": 5.761, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.28418487699924827, "eval_loss": 5.225850582122803, "eval_runtime": 19.7377, "eval_samples_per_second": 59.227, "eval_steps_per_second": 1.875, "step": 592 }, { "epoch": 5.0, "eval_accuracy": 0.2963078172659011, "eval_loss": 5.1006011962890625, "eval_runtime": 19.742, "eval_samples_per_second": 59.214, "eval_steps_per_second": 1.874, "step": 740 }, { "epoch": 6.0, "eval_accuracy": 0.30673717500064807, "eval_loss": 4.999685764312744, "eval_runtime": 19.749, "eval_samples_per_second": 59.193, "eval_steps_per_second": 1.874, "step": 888 }, { "epoch": 6.76, "learning_rate": 8.64864864864865e-06, "loss": 5.1725, "step": 1000 }, { "epoch": 7.0, "eval_accuracy": 0.3152828542050099, "eval_loss": 4.910343170166016, "eval_runtime": 19.7422, "eval_samples_per_second": 59.213, "eval_steps_per_second": 1.874, "step": 1036 }, { "epoch": 8.0, "eval_accuracy": 0.322955819961808, "eval_loss": 4.828683853149414, "eval_runtime": 19.7403, "eval_samples_per_second": 59.219, "eval_steps_per_second": 1.874, "step": 1184 }, { "epoch": 9.0, "eval_accuracy": 0.3300757791775756, "eval_loss": 4.757791996002197, "eval_runtime": 19.7469, "eval_samples_per_second": 59.199, "eval_steps_per_second": 1.874, "step": 1332 }, { "epoch": 10.0, "eval_accuracy": 0.3375845711175052, "eval_loss": 4.694174766540527, "eval_runtime": 19.7442, "eval_samples_per_second": 59.207, "eval_steps_per_second": 1.874, "step": 1480 }, { "epoch": 10.14, "learning_rate": 7.972972972972974e-06, "loss": 4.8482, "step": 1500 }, { "epoch": 11.0, "eval_accuracy": 0.3439355056121523, "eval_loss": 4.63644552230835, "eval_runtime": 19.7467, "eval_samples_per_second": 59.2, "eval_steps_per_second": 1.874, "step": 1628 }, { "epoch": 12.0, "eval_accuracy": 0.34968158920254727, "eval_loss": 4.5813164710998535, "eval_runtime": 19.7421, "eval_samples_per_second": 59.214, "eval_steps_per_second": 1.874, "step": 1776 }, { "epoch": 13.0, "eval_accuracy": 0.3554363135201459, "eval_loss": 4.532833099365234, "eval_runtime": 19.7462, "eval_samples_per_second": 59.201, "eval_steps_per_second": 1.874, "step": 1924 }, { "epoch": 13.51, "learning_rate": 7.297297297297298e-06, "loss": 4.609, "step": 2000 }, { "epoch": 14.0, "eval_accuracy": 0.3610182232936724, "eval_loss": 4.4896931648254395, "eval_runtime": 19.7471, "eval_samples_per_second": 59.199, "eval_steps_per_second": 1.874, "step": 2072 }, { "epoch": 15.0, "eval_accuracy": 0.3656582938020064, "eval_loss": 4.4454345703125, "eval_runtime": 19.7467, "eval_samples_per_second": 59.2, "eval_steps_per_second": 1.874, "step": 2220 }, { "epoch": 16.0, "eval_accuracy": 0.37016011267508275, "eval_loss": 4.413242340087891, "eval_runtime": 19.7438, "eval_samples_per_second": 59.208, "eval_steps_per_second": 1.874, "step": 2368 }, { "epoch": 16.89, "learning_rate": 6.621621621621622e-06, "loss": 4.4241, "step": 2500 }, { "epoch": 17.0, "eval_accuracy": 0.3737978588277989, "eval_loss": 4.374161243438721, "eval_runtime": 19.7484, "eval_samples_per_second": 59.195, "eval_steps_per_second": 1.874, "step": 2516 }, { "epoch": 18.0, "eval_accuracy": 0.3782132704288393, "eval_loss": 4.343791961669922, "eval_runtime": 19.7509, "eval_samples_per_second": 59.187, "eval_steps_per_second": 1.873, "step": 2664 }, { "epoch": 19.0, "eval_accuracy": 0.38171276494629786, "eval_loss": 4.316366672515869, "eval_runtime": 19.7486, "eval_samples_per_second": 59.194, "eval_steps_per_second": 1.874, "step": 2812 }, { "epoch": 20.0, "eval_accuracy": 0.3847715823763728, "eval_loss": 4.2879157066345215, "eval_runtime": 19.7502, "eval_samples_per_second": 59.189, "eval_steps_per_second": 1.873, "step": 2960 }, { "epoch": 20.27, "learning_rate": 5.945945945945947e-06, "loss": 4.283, "step": 3000 }, { "epoch": 21.0, "eval_accuracy": 0.3878131183520405, "eval_loss": 4.260156631469727, "eval_runtime": 19.7607, "eval_samples_per_second": 59.158, "eval_steps_per_second": 1.872, "step": 3108 }, { "epoch": 22.0, "eval_accuracy": 0.39024116269625253, "eval_loss": 4.23726224899292, "eval_runtime": 19.749, "eval_samples_per_second": 59.193, "eval_steps_per_second": 1.874, "step": 3256 }, { "epoch": 23.0, "eval_accuracy": 0.3931530877638662, "eval_loss": 4.216041088104248, "eval_runtime": 19.7399, "eval_samples_per_second": 59.22, "eval_steps_per_second": 1.874, "step": 3404 }, { "epoch": 23.65, "learning_rate": 5.2702702702702705e-06, "loss": 4.1606, "step": 3500 }, { "epoch": 24.0, "eval_accuracy": 0.39539103610959897, "eval_loss": 4.196435928344727, "eval_runtime": 19.7434, "eval_samples_per_second": 59.21, "eval_steps_per_second": 1.874, "step": 3552 }, { "epoch": 25.0, "eval_accuracy": 0.39759442154651736, "eval_loss": 4.181632041931152, "eval_runtime": 19.7448, "eval_samples_per_second": 59.205, "eval_steps_per_second": 1.874, "step": 3700 }, { "epoch": 26.0, "eval_accuracy": 0.39951266298571686, "eval_loss": 4.160492897033691, "eval_runtime": 19.7431, "eval_samples_per_second": 59.21, "eval_steps_per_second": 1.874, "step": 3848 }, { "epoch": 27.0, "eval_accuracy": 0.4016382818778028, "eval_loss": 4.144294738769531, "eval_runtime": 19.7428, "eval_samples_per_second": 59.212, "eval_steps_per_second": 1.874, "step": 3996 }, { "epoch": 27.03, "learning_rate": 4.594594594594596e-06, "loss": 4.0685, "step": 4000 }, { "epoch": 28.0, "eval_accuracy": 0.4034355531361519, "eval_loss": 4.129027366638184, "eval_runtime": 19.7497, "eval_samples_per_second": 59.191, "eval_steps_per_second": 1.873, "step": 4144 }, { "epoch": 29.0, "eval_accuracy": 0.40515505784966865, "eval_loss": 4.114578723907471, "eval_runtime": 19.7461, "eval_samples_per_second": 59.202, "eval_steps_per_second": 1.874, "step": 4292 }, { "epoch": 30.0, "eval_accuracy": 0.40673631092792767, "eval_loss": 4.103656768798828, "eval_runtime": 19.7473, "eval_samples_per_second": 59.198, "eval_steps_per_second": 1.874, "step": 4440 }, { "epoch": 30.41, "learning_rate": 3.918918918918919e-06, "loss": 3.9859, "step": 4500 }, { "epoch": 31.0, "eval_accuracy": 0.40824843818855794, "eval_loss": 4.087373733520508, "eval_runtime": 19.744, "eval_samples_per_second": 59.208, "eval_steps_per_second": 1.874, "step": 4588 }, { "epoch": 32.0, "eval_accuracy": 0.4098124098124098, "eval_loss": 4.078439235687256, "eval_runtime": 19.7428, "eval_samples_per_second": 59.211, "eval_steps_per_second": 1.874, "step": 4736 }, { "epoch": 33.0, "eval_accuracy": 0.4114714294355013, "eval_loss": 4.066919803619385, "eval_runtime": 19.7485, "eval_samples_per_second": 59.194, "eval_steps_per_second": 1.874, "step": 4884 }, { "epoch": 33.78, "learning_rate": 3.2432432432432437e-06, "loss": 3.9275, "step": 5000 }, { "epoch": 34.0, "eval_accuracy": 0.41251695742713707, "eval_loss": 4.058130264282227, "eval_runtime": 19.7418, "eval_samples_per_second": 59.215, "eval_steps_per_second": 1.874, "step": 5032 }, { "epoch": 35.0, "eval_accuracy": 0.41355384469156925, "eval_loss": 4.047876834869385, "eval_runtime": 19.7402, "eval_samples_per_second": 59.219, "eval_steps_per_second": 1.874, "step": 5180 }, { "epoch": 36.0, "eval_accuracy": 0.41476354650007347, "eval_loss": 4.038449287414551, "eval_runtime": 19.7398, "eval_samples_per_second": 59.22, "eval_steps_per_second": 1.874, "step": 5328 }, { "epoch": 37.0, "eval_accuracy": 0.4158868410365416, "eval_loss": 4.033016204833984, "eval_runtime": 19.7451, "eval_samples_per_second": 59.204, "eval_steps_per_second": 1.874, "step": 5476 }, { "epoch": 37.16, "learning_rate": 2.5675675675675675e-06, "loss": 3.8799, "step": 5500 }, { "epoch": 38.0, "eval_accuracy": 0.41657809921282973, "eval_loss": 4.026158332824707, "eval_runtime": 19.7412, "eval_samples_per_second": 59.216, "eval_steps_per_second": 1.874, "step": 5624 }, { "epoch": 39.0, "eval_accuracy": 0.4173989682971719, "eval_loss": 4.021160125732422, "eval_runtime": 19.7395, "eval_samples_per_second": 59.221, "eval_steps_per_second": 1.874, "step": 5772 }, { "epoch": 40.0, "eval_accuracy": 0.418003819201424, "eval_loss": 4.013641834259033, "eval_runtime": 19.7429, "eval_samples_per_second": 59.211, "eval_steps_per_second": 1.874, "step": 5920 }, { "epoch": 40.54, "learning_rate": 1.8918918918918922e-06, "loss": 3.8348, "step": 6000 }, { "epoch": 41.0, "eval_accuracy": 0.4185568257424545, "eval_loss": 4.011115550994873, "eval_runtime": 19.7447, "eval_samples_per_second": 59.206, "eval_steps_per_second": 1.874, "step": 6068 }, { "epoch": 42.0, "eval_accuracy": 0.4192567246459462, "eval_loss": 4.004785060882568, "eval_runtime": 19.7414, "eval_samples_per_second": 59.216, "eval_steps_per_second": 1.874, "step": 6216 }, { "epoch": 43.0, "eval_accuracy": 0.41952458718925784, "eval_loss": 4.000365257263184, "eval_runtime": 19.7443, "eval_samples_per_second": 59.207, "eval_steps_per_second": 1.874, "step": 6364 }, { "epoch": 43.92, "learning_rate": 1.2162162162162164e-06, "loss": 3.8151, "step": 6500 }, { "epoch": 44.0, "eval_accuracy": 0.4198615755501983, "eval_loss": 3.997826337814331, "eval_runtime": 19.752, "eval_samples_per_second": 59.184, "eval_steps_per_second": 1.873, "step": 6512 }, { "epoch": 45.0, "eval_accuracy": 0.4203195340919892, "eval_loss": 3.9950661659240723, "eval_runtime": 19.7402, "eval_samples_per_second": 59.219, "eval_steps_per_second": 1.874, "step": 6660 }, { "epoch": 46.0, "eval_accuracy": 0.42070836681615126, "eval_loss": 3.99338960647583, "eval_runtime": 19.7409, "eval_samples_per_second": 59.217, "eval_steps_per_second": 1.874, "step": 6808 }, { "epoch": 47.0, "eval_accuracy": 0.4209071035418341, "eval_loss": 3.990373134613037, "eval_runtime": 19.7427, "eval_samples_per_second": 59.212, "eval_steps_per_second": 1.874, "step": 6956 }, { "epoch": 47.3, "learning_rate": 5.405405405405406e-07, "loss": 3.7948, "step": 7000 }, { "epoch": 48.0, "eval_accuracy": 0.4210626366314989, "eval_loss": 3.9910764694213867, "eval_runtime": 19.7469, "eval_samples_per_second": 59.199, "eval_steps_per_second": 1.874, "step": 7104 }, { "epoch": 49.0, "eval_accuracy": 0.42127001408438536, "eval_loss": 3.9890379905700684, "eval_runtime": 19.7459, "eval_samples_per_second": 59.202, "eval_steps_per_second": 1.874, "step": 7252 }, { "epoch": 50.0, "eval_accuracy": 0.42128729553879257, "eval_loss": 3.988962173461914, "eval_runtime": 19.7474, "eval_samples_per_second": 59.198, "eval_steps_per_second": 1.874, "step": 7400 }, { "epoch": 50.0, "step": 7400, "total_flos": 1.00265577216e+17, "train_loss": 4.29683648186761, "train_runtime": 9035.6616, "train_samples_per_second": 26.108, "train_steps_per_second": 0.819 } ], "logging_steps": 500, "max_steps": 7400, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.00265577216e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }