{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 14358, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.826577517760134e-05, "loss": 2.376, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.652458559687979e-05, "loss": 1.9479, "step": 1000 }, { "epoch": 0.31, "learning_rate": 4.478339601615824e-05, "loss": 1.8326, "step": 1500 }, { "epoch": 0.42, "learning_rate": 4.304568881459813e-05, "loss": 1.7996, "step": 2000 }, { "epoch": 0.52, "learning_rate": 4.1304499233876586e-05, "loss": 1.7476, "step": 2500 }, { "epoch": 0.63, "learning_rate": 3.956330965315504e-05, "loss": 1.7537, "step": 3000 }, { "epoch": 0.73, "learning_rate": 3.782212007243349e-05, "loss": 1.7133, "step": 3500 }, { "epoch": 0.84, "learning_rate": 3.608093049171194e-05, "loss": 1.6888, "step": 4000 }, { "epoch": 0.94, "learning_rate": 3.434322329015183e-05, "loss": 1.6906, "step": 4500 }, { "epoch": 1.0, "eval_loss": 1.6714558601379395, "eval_runtime": 47.5103, "eval_samples_per_second": 9.577, "eval_steps_per_second": 1.2, "step": 4786 }, { "epoch": 1.04, "learning_rate": 3.2602033709430284e-05, "loss": 1.6771, "step": 5000 }, { "epoch": 1.15, "learning_rate": 3.086084412870874e-05, "loss": 1.6319, "step": 5500 }, { "epoch": 1.25, "learning_rate": 2.9119654547987185e-05, "loss": 1.6327, "step": 6000 }, { "epoch": 1.36, "learning_rate": 2.737846496726564e-05, "loss": 1.6368, "step": 6500 }, { "epoch": 1.46, "learning_rate": 2.563727538654409e-05, "loss": 1.6329, "step": 7000 }, { "epoch": 1.57, "learning_rate": 2.3899568184983983e-05, "loss": 1.6143, "step": 7500 }, { "epoch": 1.67, "learning_rate": 2.2158378604262433e-05, "loss": 1.6044, "step": 8000 }, { "epoch": 1.78, "learning_rate": 2.0417189023540884e-05, "loss": 1.6295, "step": 8500 }, { "epoch": 1.88, "learning_rate": 1.8675999442819337e-05, "loss": 1.6023, "step": 9000 }, { "epoch": 1.98, "learning_rate": 1.6934809862097788e-05, "loss": 1.6021, "step": 9500 }, { "epoch": 2.0, "eval_loss": 1.6306262016296387, "eval_runtime": 43.7183, "eval_samples_per_second": 10.408, "eval_steps_per_second": 1.304, "step": 9572 }, { "epoch": 2.09, "learning_rate": 1.5193620281376237e-05, "loss": 1.5842, "step": 10000 }, { "epoch": 2.19, "learning_rate": 1.3452430700654689e-05, "loss": 1.587, "step": 10500 }, { "epoch": 2.3, "learning_rate": 1.1714723499094582e-05, "loss": 1.5566, "step": 11000 }, { "epoch": 2.4, "learning_rate": 9.973533918373034e-06, "loss": 1.6022, "step": 11500 }, { "epoch": 2.51, "learning_rate": 8.232344337651483e-06, "loss": 1.5785, "step": 12000 }, { "epoch": 2.61, "learning_rate": 6.491154756929935e-06, "loss": 1.5775, "step": 12500 }, { "epoch": 2.72, "learning_rate": 4.753447555369829e-06, "loss": 1.5684, "step": 13000 }, { "epoch": 2.82, "learning_rate": 3.01225797464828e-06, "loss": 1.5719, "step": 13500 }, { "epoch": 2.93, "learning_rate": 1.2710683939267307e-06, "loss": 1.5819, "step": 14000 }, { "epoch": 3.0, "eval_loss": 1.6228220462799072, "eval_runtime": 42.6413, "eval_samples_per_second": 10.67, "eval_steps_per_second": 1.337, "step": 14358 }, { "epoch": 3.0, "step": 14358, "total_flos": 1.5006523981824e+16, "train_loss": 1.6758505107466766, "train_runtime": 8429.8505, "train_samples_per_second": 3.406, "train_steps_per_second": 1.703 } ], "max_steps": 14358, "num_train_epochs": 3, "total_flos": 1.5006523981824e+16, "trial_name": null, "trial_params": null }