{ "best_metric": 3.57045841217041, "best_model_checkpoint": "output/50-cent/checkpoint-258", "epoch": 1.0, "global_step": 258, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00013707289568217504, "loss": 4.0779, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.000136692053734753, "loss": 4.1148, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00013605888543050137, "loss": 4.0688, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.000135175737079198, "loss": 3.9408, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.0001340458813329925, "loss": 3.722, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.0001326735050590528, "loss": 3.9874, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.0001310636938244372, "loss": 3.8949, "step": 35 }, { "epoch": 0.16, "learning_rate": 0.00012922241305068514, "loss": 3.9061, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.00012715648590796197, "loss": 3.8976, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00012487356803067445, "loss": 3.672, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00012238211914825242, "loss": 3.6326, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.00011969137173622343, "loss": 3.7851, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00011681129680374918, "loss": 3.7425, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00011375256694440331, "loss": 3.7256, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00011052651678711246, "loss": 3.6549, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.00010714510099381572, "loss": 3.7504, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00010362084995948948, "loss": 3.7761, "step": 85 }, { "epoch": 0.35, "learning_rate": 9.99668233786982e-05, "loss": 3.6905, "step": 90 }, { "epoch": 0.37, "learning_rate": 9.619656185073828e-05, "loss": 3.6569, "step": 95 }, { "epoch": 0.39, "learning_rate": 9.232403670270978e-05, "loss": 3.6521, "step": 100 }, { "epoch": 0.41, "learning_rate": 8.83635982164553e-05, "loss": 3.7574, "step": 105 }, { "epoch": 0.43, "learning_rate": 8.432992245121922e-05, "loss": 3.6063, "step": 110 }, { "epoch": 0.45, "learning_rate": 8.023795685908513e-05, "loss": 3.6537, "step": 115 }, { "epoch": 0.47, "learning_rate": 7.61028648947222e-05, "loss": 3.6364, "step": 120 }, { "epoch": 0.48, "learning_rate": 7.193996982469813e-05, "loss": 3.7473, "step": 125 }, { "epoch": 0.5, "learning_rate": 6.776469794458297e-05, "loss": 3.6127, "step": 130 }, { "epoch": 0.52, "learning_rate": 6.359252141426174e-05, "loss": 3.6751, "step": 135 }, { "epoch": 0.54, "learning_rate": 5.943890092328931e-05, "loss": 3.6126, "step": 140 }, { "epoch": 0.56, "learning_rate": 5.531922839875045e-05, "loss": 3.6423, "step": 145 }, { "epoch": 0.58, "learning_rate": 5.1248769967930697e-05, "loss": 3.4521, "step": 150 }, { "epoch": 0.6, "learning_rate": 4.724260938715906e-05, "loss": 3.6127, "step": 155 }, { "epoch": 0.62, "learning_rate": 4.331559214645638e-05, "loss": 3.7675, "step": 160 }, { "epoch": 0.64, "learning_rate": 3.9482270457119095e-05, "loss": 3.6344, "step": 165 }, { "epoch": 0.66, "learning_rate": 3.575684932609548e-05, "loss": 3.5602, "step": 170 }, { "epoch": 0.68, "learning_rate": 3.215313391698519e-05, "loss": 3.6583, "step": 175 }, { "epoch": 0.7, "learning_rate": 2.8684478392724447e-05, "loss": 3.6643, "step": 180 }, { "epoch": 0.72, "learning_rate": 2.5363736429528382e-05, "loss": 3.4823, "step": 185 }, { "epoch": 0.74, "learning_rate": 2.2203213585470068e-05, "loss": 3.4787, "step": 190 }, { "epoch": 0.76, "learning_rate": 1.9214621700201495e-05, "loss": 3.4209, "step": 195 }, { "epoch": 0.78, "learning_rate": 1.6409035494796526e-05, "loss": 3.3871, "step": 200 }, { "epoch": 0.79, "learning_rate": 1.3796851532541997e-05, "loss": 3.3947, "step": 205 }, { "epoch": 0.81, "learning_rate": 1.1387749692754382e-05, "loss": 3.4866, "step": 210 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 3.5918, "step": 215 }, { "epoch": 0.85, "learning_rate": 7.213716044354821e-06, "loss": 3.4983, "step": 220 }, { "epoch": 0.87, "learning_rate": 5.464251807155671e-06, "loss": 3.7741, "step": 225 }, { "epoch": 0.89, "learning_rate": 3.948747517607189e-06, "loss": 3.7211, "step": 230 }, { "epoch": 0.91, "learning_rate": 2.6728191272803667e-06, "loss": 3.429, "step": 235 }, { "epoch": 0.93, "learning_rate": 1.6411947996634035e-06, "loss": 3.5074, "step": 240 }, { "epoch": 0.95, "learning_rate": 8.57697389170679e-07, "loss": 3.7424, "step": 245 }, { "epoch": 0.97, "learning_rate": 3.252302749240256e-07, "loss": 3.5668, "step": 250 }, { "epoch": 0.99, "learning_rate": 4.576660180157781e-08, "loss": 3.5697, "step": 255 }, { "epoch": 1.0, "eval_loss": 3.57045841217041, "eval_runtime": 16.4667, "eval_samples_per_second": 22.47, "eval_steps_per_second": 2.854, "step": 258 } ], "max_steps": 258, "num_train_epochs": 1, "total_flos": 268869500928000.0, "trial_name": null, "trial_params": null }