{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 47275, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9471179270227394e-05, "loss": 3.4788, "step": 500 }, { "epoch": 0.11, "learning_rate": 4.8942358540454786e-05, "loss": 2.8908, "step": 1000 }, { "epoch": 0.16, "learning_rate": 4.841353781068218e-05, "loss": 2.6428, "step": 1500 }, { "epoch": 0.21, "learning_rate": 4.7884717080909575e-05, "loss": 2.4772, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.735589635113697e-05, "loss": 2.3782, "step": 2500 }, { "epoch": 0.32, "learning_rate": 4.682707562136436e-05, "loss": 2.2953, "step": 3000 }, { "epoch": 0.37, "learning_rate": 4.629825489159175e-05, "loss": 2.2185, "step": 3500 }, { "epoch": 0.42, "learning_rate": 4.576943416181915e-05, "loss": 2.1603, "step": 4000 }, { "epoch": 0.48, "learning_rate": 4.524167107350608e-05, "loss": 2.126, "step": 4500 }, { "epoch": 0.53, "learning_rate": 4.471285034373348e-05, "loss": 2.083, "step": 5000 }, { "epoch": 0.58, "learning_rate": 4.418402961396087e-05, "loss": 2.0452, "step": 5500 }, { "epoch": 0.63, "learning_rate": 4.365520888418826e-05, "loss": 2.023, "step": 6000 }, { "epoch": 0.69, "learning_rate": 4.312638815441565e-05, "loss": 1.9837, "step": 6500 }, { "epoch": 0.74, "learning_rate": 4.259756742464305e-05, "loss": 1.9416, "step": 7000 }, { "epoch": 0.79, "learning_rate": 4.206874669487044e-05, "loss": 1.9265, "step": 7500 }, { "epoch": 0.85, "learning_rate": 4.1539925965097834e-05, "loss": 1.9048, "step": 8000 }, { "epoch": 0.9, "learning_rate": 4.101216287678477e-05, "loss": 1.8912, "step": 8500 }, { "epoch": 0.95, "learning_rate": 4.0483342147012164e-05, "loss": 1.8753, "step": 9000 }, { "epoch": 1.0, "learning_rate": 3.995452141723956e-05, "loss": 1.8297, "step": 9500 }, { "epoch": 1.06, "learning_rate": 3.942570068746695e-05, "loss": 1.7432, "step": 10000 }, { "epoch": 1.11, "learning_rate": 3.889793759915389e-05, "loss": 1.7278, "step": 10500 }, { "epoch": 1.16, "learning_rate": 3.8369116869381283e-05, "loss": 1.716, "step": 11000 }, { "epoch": 1.22, "learning_rate": 3.7841353781068215e-05, "loss": 1.7175, "step": 11500 }, { "epoch": 1.27, "learning_rate": 3.731359069275516e-05, "loss": 1.7146, "step": 12000 }, { "epoch": 1.32, "learning_rate": 3.678476996298255e-05, "loss": 1.693, "step": 12500 }, { "epoch": 1.37, "learning_rate": 3.625594923320994e-05, "loss": 1.686, "step": 13000 }, { "epoch": 1.43, "learning_rate": 3.5727128503437334e-05, "loss": 1.6742, "step": 13500 }, { "epoch": 1.48, "learning_rate": 3.5198307773664726e-05, "loss": 1.6799, "step": 14000 }, { "epoch": 1.53, "learning_rate": 3.4669487043892124e-05, "loss": 1.6635, "step": 14500 }, { "epoch": 1.59, "learning_rate": 3.4140666314119516e-05, "loss": 1.6466, "step": 15000 }, { "epoch": 1.64, "learning_rate": 3.361184558434691e-05, "loss": 1.6462, "step": 15500 }, { "epoch": 1.69, "learning_rate": 3.30830248545743e-05, "loss": 1.6325, "step": 16000 }, { "epoch": 1.75, "learning_rate": 3.25542041248017e-05, "loss": 1.6235, "step": 16500 }, { "epoch": 1.8, "learning_rate": 3.202538339502908e-05, "loss": 1.6238, "step": 17000 }, { "epoch": 1.85, "learning_rate": 3.149656266525648e-05, "loss": 1.6193, "step": 17500 }, { "epoch": 1.9, "learning_rate": 3.096774193548387e-05, "loss": 1.6095, "step": 18000 }, { "epoch": 1.96, "learning_rate": 3.0438921205711264e-05, "loss": 1.6055, "step": 18500 }, { "epoch": 2.01, "learning_rate": 2.991010047593866e-05, "loss": 1.5826, "step": 19000 }, { "epoch": 2.06, "learning_rate": 2.938127974616605e-05, "loss": 1.4867, "step": 19500 }, { "epoch": 2.12, "learning_rate": 2.8852459016393445e-05, "loss": 1.4935, "step": 20000 }, { "epoch": 2.17, "learning_rate": 2.832469592808038e-05, "loss": 1.4922, "step": 20500 }, { "epoch": 2.22, "learning_rate": 2.7795875198307775e-05, "loss": 1.4821, "step": 21000 }, { "epoch": 2.27, "learning_rate": 2.7268112109994716e-05, "loss": 1.4862, "step": 21500 }, { "epoch": 2.33, "learning_rate": 2.6739291380222104e-05, "loss": 1.4818, "step": 22000 }, { "epoch": 2.38, "learning_rate": 2.6211528291909042e-05, "loss": 1.4798, "step": 22500 }, { "epoch": 2.43, "learning_rate": 2.5682707562136437e-05, "loss": 1.4719, "step": 23000 }, { "epoch": 2.49, "learning_rate": 2.5153886832363832e-05, "loss": 1.4622, "step": 23500 }, { "epoch": 2.54, "learning_rate": 2.4625066102591224e-05, "loss": 1.4654, "step": 24000 }, { "epoch": 2.59, "learning_rate": 2.4096245372818615e-05, "loss": 1.4618, "step": 24500 }, { "epoch": 2.64, "learning_rate": 2.3567424643046007e-05, "loss": 1.4418, "step": 25000 }, { "epoch": 2.7, "learning_rate": 2.3038603913273402e-05, "loss": 1.4613, "step": 25500 }, { "epoch": 2.75, "learning_rate": 2.2509783183500794e-05, "loss": 1.4543, "step": 26000 }, { "epoch": 2.8, "learning_rate": 2.198096245372819e-05, "loss": 1.4444, "step": 26500 }, { "epoch": 2.86, "learning_rate": 2.145214172395558e-05, "loss": 1.4456, "step": 27000 }, { "epoch": 2.91, "learning_rate": 2.0923320994182975e-05, "loss": 1.4435, "step": 27500 }, { "epoch": 2.96, "learning_rate": 2.0394500264410367e-05, "loss": 1.4228, "step": 28000 }, { "epoch": 3.01, "learning_rate": 1.986567953463776e-05, "loss": 1.4057, "step": 28500 }, { "epoch": 3.07, "learning_rate": 1.9337916446324696e-05, "loss": 1.3607, "step": 29000 }, { "epoch": 3.12, "learning_rate": 1.8810153358011634e-05, "loss": 1.3515, "step": 29500 }, { "epoch": 3.17, "learning_rate": 1.828133262823903e-05, "loss": 1.3416, "step": 30000 }, { "epoch": 3.23, "learning_rate": 1.7753569539925964e-05, "loss": 1.3454, "step": 30500 }, { "epoch": 3.28, "learning_rate": 1.722474881015336e-05, "loss": 1.3449, "step": 31000 }, { "epoch": 3.33, "learning_rate": 1.669592808038075e-05, "loss": 1.3358, "step": 31500 }, { "epoch": 3.38, "learning_rate": 1.6167107350608142e-05, "loss": 1.3363, "step": 32000 }, { "epoch": 3.44, "learning_rate": 1.5638286620835537e-05, "loss": 1.3372, "step": 32500 }, { "epoch": 3.49, "learning_rate": 1.5109465891062929e-05, "loss": 1.3279, "step": 33000 }, { "epoch": 3.54, "learning_rate": 1.4580645161290324e-05, "loss": 1.3446, "step": 33500 }, { "epoch": 3.6, "learning_rate": 1.4051824431517715e-05, "loss": 1.3296, "step": 34000 }, { "epoch": 3.65, "learning_rate": 1.352300370174511e-05, "loss": 1.3208, "step": 34500 }, { "epoch": 3.7, "learning_rate": 1.2994182971972502e-05, "loss": 1.3309, "step": 35000 }, { "epoch": 3.75, "learning_rate": 1.2465362242199895e-05, "loss": 1.324, "step": 35500 }, { "epoch": 3.81, "learning_rate": 1.1936541512427288e-05, "loss": 1.321, "step": 36000 }, { "epoch": 3.86, "learning_rate": 1.1407720782654681e-05, "loss": 1.3155, "step": 36500 }, { "epoch": 3.91, "learning_rate": 1.0878900052882075e-05, "loss": 1.3266, "step": 37000 }, { "epoch": 3.97, "learning_rate": 1.0352194606028556e-05, "loss": 1.3113, "step": 37500 }, { "epoch": 4.02, "learning_rate": 9.823373876255949e-06, "loss": 1.2879, "step": 38000 }, { "epoch": 4.07, "learning_rate": 9.294553146483342e-06, "loss": 1.2644, "step": 38500 }, { "epoch": 4.12, "learning_rate": 8.765732416710736e-06, "loss": 1.259, "step": 39000 }, { "epoch": 4.18, "learning_rate": 8.236911686938129e-06, "loss": 1.2709, "step": 39500 }, { "epoch": 4.23, "learning_rate": 7.708090957165522e-06, "loss": 1.2528, "step": 40000 }, { "epoch": 4.28, "learning_rate": 7.18032786885246e-06, "loss": 1.2498, "step": 40500 }, { "epoch": 4.34, "learning_rate": 6.651507139079852e-06, "loss": 1.2522, "step": 41000 }, { "epoch": 4.39, "learning_rate": 6.122686409307245e-06, "loss": 1.2504, "step": 41500 }, { "epoch": 4.44, "learning_rate": 5.5938656795346375e-06, "loss": 1.2602, "step": 42000 }, { "epoch": 4.49, "learning_rate": 5.065044949762031e-06, "loss": 1.2431, "step": 42500 }, { "epoch": 4.55, "learning_rate": 4.536224219989424e-06, "loss": 1.2391, "step": 43000 }, { "epoch": 4.6, "learning_rate": 4.007403490216817e-06, "loss": 1.2426, "step": 43500 }, { "epoch": 4.65, "learning_rate": 3.4785827604442092e-06, "loss": 1.2421, "step": 44000 }, { "epoch": 4.71, "learning_rate": 2.9497620306716025e-06, "loss": 1.2411, "step": 44500 }, { "epoch": 4.76, "learning_rate": 2.4219989423585406e-06, "loss": 1.2403, "step": 45000 }, { "epoch": 4.81, "learning_rate": 1.8931782125859334e-06, "loss": 1.2524, "step": 45500 }, { "epoch": 4.87, "learning_rate": 1.3643574828133265e-06, "loss": 1.2433, "step": 46000 }, { "epoch": 4.92, "learning_rate": 8.355367530407193e-07, "loss": 1.2377, "step": 46500 }, { "epoch": 4.97, "learning_rate": 3.0671602326811214e-07, "loss": 1.2334, "step": 47000 }, { "epoch": 5.0, "step": 47275, "total_flos": 5.929143675153285e+17, "train_loss": 1.586575432610474, "train_runtime": 13457.6286, "train_samples_per_second": 224.823, "train_steps_per_second": 3.513 } ], "logging_steps": 500, "max_steps": 47275, "num_train_epochs": 5, "save_steps": 10000000, "total_flos": 5.929143675153285e+17, "trial_name": null, "trial_params": null }