{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9840848806366047, "eval_steps": 500, "global_step": 40500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.938599076530111e-05, "loss": 0.5537, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.877198153060222e-05, "loss": 0.5484, "step": 1000 }, { "epoch": 0.11, "learning_rate": 4.815797229590333e-05, "loss": 0.5563, "step": 1500 }, { "epoch": 0.15, "learning_rate": 4.754396306120444e-05, "loss": 0.5339, "step": 2000 }, { "epoch": 0.18, "learning_rate": 4.692995382650555e-05, "loss": 0.5478, "step": 2500 }, { "epoch": 0.22, "learning_rate": 4.631594459180666e-05, "loss": 0.5462, "step": 3000 }, { "epoch": 0.26, "learning_rate": 4.5701935357107775e-05, "loss": 0.5555, "step": 3500 }, { "epoch": 0.29, "learning_rate": 4.508792612240889e-05, "loss": 0.5463, "step": 4000 }, { "epoch": 0.33, "learning_rate": 4.4473916887709995e-05, "loss": 0.5379, "step": 4500 }, { "epoch": 0.37, "learning_rate": 4.38599076530111e-05, "loss": 0.5305, "step": 5000 }, { "epoch": 0.41, "learning_rate": 4.3245898418312215e-05, "loss": 0.5564, "step": 5500 }, { "epoch": 0.44, "learning_rate": 4.263188918361332e-05, "loss": 0.5208, "step": 6000 }, { "epoch": 0.48, "learning_rate": 4.2017879948914435e-05, "loss": 0.5264, "step": 6500 }, { "epoch": 0.52, "learning_rate": 4.140387071421554e-05, "loss": 0.5595, "step": 7000 }, { "epoch": 0.55, "learning_rate": 4.0789861479516655e-05, "loss": 0.556, "step": 7500 }, { "epoch": 0.59, "learning_rate": 4.017585224481776e-05, "loss": 0.5514, "step": 8000 }, { "epoch": 0.63, "learning_rate": 3.9561843010118875e-05, "loss": 0.5334, "step": 8500 }, { "epoch": 0.66, "learning_rate": 3.894783377541998e-05, "loss": 0.5481, "step": 9000 }, { "epoch": 0.7, "learning_rate": 3.8333824540721095e-05, "loss": 0.5384, "step": 9500 }, { "epoch": 0.74, "learning_rate": 3.771981530602221e-05, "loss": 0.5356, "step": 10000 }, { "epoch": 0.77, "learning_rate": 3.7105806071323314e-05, "loss": 0.5258, "step": 10500 }, { "epoch": 0.81, "learning_rate": 3.649179683662443e-05, "loss": 0.5356, "step": 11000 }, { "epoch": 0.85, "learning_rate": 3.5877787601925534e-05, "loss": 0.5282, "step": 11500 }, { "epoch": 0.88, "learning_rate": 3.526377836722665e-05, "loss": 0.5322, "step": 12000 }, { "epoch": 0.92, "learning_rate": 3.4649769132527754e-05, "loss": 0.5364, "step": 12500 }, { "epoch": 0.96, "learning_rate": 3.403575989782887e-05, "loss": 0.5372, "step": 13000 }, { "epoch": 0.99, "learning_rate": 3.3421750663129974e-05, "loss": 0.5321, "step": 13500 }, { "epoch": 1.03, "learning_rate": 3.280774142843108e-05, "loss": 0.5254, "step": 14000 }, { "epoch": 1.07, "learning_rate": 3.2193732193732194e-05, "loss": 0.5168, "step": 14500 }, { "epoch": 1.11, "learning_rate": 3.15797229590333e-05, "loss": 0.5236, "step": 15000 }, { "epoch": 1.14, "learning_rate": 3.0965713724334414e-05, "loss": 0.5276, "step": 15500 }, { "epoch": 1.18, "learning_rate": 3.0351704489635524e-05, "loss": 0.511, "step": 16000 }, { "epoch": 1.22, "learning_rate": 2.9737695254936637e-05, "loss": 0.5144, "step": 16500 }, { "epoch": 1.25, "learning_rate": 2.9123686020237744e-05, "loss": 0.5215, "step": 17000 }, { "epoch": 1.29, "learning_rate": 2.8509676785538857e-05, "loss": 0.5143, "step": 17500 }, { "epoch": 1.33, "learning_rate": 2.7895667550839967e-05, "loss": 0.522, "step": 18000 }, { "epoch": 1.36, "learning_rate": 2.728165831614108e-05, "loss": 0.5258, "step": 18500 }, { "epoch": 1.4, "learning_rate": 2.6667649081442187e-05, "loss": 0.5236, "step": 19000 }, { "epoch": 1.44, "learning_rate": 2.6053639846743293e-05, "loss": 0.5124, "step": 19500 }, { "epoch": 1.47, "learning_rate": 2.5439630612044407e-05, "loss": 0.5134, "step": 20000 }, { "epoch": 1.51, "learning_rate": 2.4825621377345516e-05, "loss": 0.5123, "step": 20500 }, { "epoch": 1.55, "learning_rate": 2.4211612142646626e-05, "loss": 0.5157, "step": 21000 }, { "epoch": 1.58, "learning_rate": 2.3597602907947736e-05, "loss": 0.52, "step": 21500 }, { "epoch": 1.62, "learning_rate": 2.2983593673248846e-05, "loss": 0.5039, "step": 22000 }, { "epoch": 1.66, "learning_rate": 2.2369584438549956e-05, "loss": 0.5266, "step": 22500 }, { "epoch": 1.69, "learning_rate": 2.1755575203851066e-05, "loss": 0.5073, "step": 23000 }, { "epoch": 1.73, "learning_rate": 2.1141565969152176e-05, "loss": 0.5142, "step": 23500 }, { "epoch": 1.77, "learning_rate": 2.0527556734453286e-05, "loss": 0.5276, "step": 24000 }, { "epoch": 1.81, "learning_rate": 1.99135474997544e-05, "loss": 0.5156, "step": 24500 }, { "epoch": 1.84, "learning_rate": 1.929953826505551e-05, "loss": 0.5298, "step": 25000 }, { "epoch": 1.88, "learning_rate": 1.868552903035662e-05, "loss": 0.5183, "step": 25500 }, { "epoch": 1.92, "learning_rate": 1.8071519795657726e-05, "loss": 0.5152, "step": 26000 }, { "epoch": 1.95, "learning_rate": 1.7457510560958836e-05, "loss": 0.5025, "step": 26500 }, { "epoch": 1.99, "learning_rate": 1.6843501326259946e-05, "loss": 0.5113, "step": 27000 }, { "epoch": 2.03, "learning_rate": 1.622949209156106e-05, "loss": 0.512, "step": 27500 }, { "epoch": 2.06, "learning_rate": 1.561548285686217e-05, "loss": 0.5042, "step": 28000 }, { "epoch": 2.1, "learning_rate": 1.5001473622163279e-05, "loss": 0.5078, "step": 28500 }, { "epoch": 2.14, "learning_rate": 1.4387464387464389e-05, "loss": 0.5004, "step": 29000 }, { "epoch": 2.17, "learning_rate": 1.3773455152765499e-05, "loss": 0.5042, "step": 29500 }, { "epoch": 2.21, "learning_rate": 1.315944591806661e-05, "loss": 0.4991, "step": 30000 }, { "epoch": 2.25, "learning_rate": 1.2545436683367717e-05, "loss": 0.5156, "step": 30500 }, { "epoch": 2.28, "learning_rate": 1.1931427448668829e-05, "loss": 0.5001, "step": 31000 }, { "epoch": 2.32, "learning_rate": 1.1317418213969938e-05, "loss": 0.4968, "step": 31500 }, { "epoch": 2.36, "learning_rate": 1.0703408979271048e-05, "loss": 0.5152, "step": 32000 }, { "epoch": 2.39, "learning_rate": 1.0089399744572158e-05, "loss": 0.5, "step": 32500 }, { "epoch": 2.43, "learning_rate": 9.47539050987327e-06, "loss": 0.5144, "step": 33000 }, { "epoch": 2.47, "learning_rate": 8.86138127517438e-06, "loss": 0.5103, "step": 33500 }, { "epoch": 2.51, "learning_rate": 8.247372040475488e-06, "loss": 0.5108, "step": 34000 }, { "epoch": 2.54, "learning_rate": 7.6333628057766e-06, "loss": 0.4941, "step": 34500 }, { "epoch": 2.58, "learning_rate": 7.01935357107771e-06, "loss": 0.5047, "step": 35000 }, { "epoch": 2.62, "learning_rate": 6.40534433637882e-06, "loss": 0.5011, "step": 35500 }, { "epoch": 2.65, "learning_rate": 5.7913351016799295e-06, "loss": 0.5069, "step": 36000 }, { "epoch": 2.69, "learning_rate": 5.17732586698104e-06, "loss": 0.5051, "step": 36500 }, { "epoch": 2.73, "learning_rate": 4.563316632282149e-06, "loss": 0.5186, "step": 37000 }, { "epoch": 2.76, "learning_rate": 3.94930739758326e-06, "loss": 0.5167, "step": 37500 }, { "epoch": 2.8, "learning_rate": 3.3352981628843697e-06, "loss": 0.5023, "step": 38000 }, { "epoch": 2.84, "learning_rate": 2.72128892818548e-06, "loss": 0.5044, "step": 38500 }, { "epoch": 2.87, "learning_rate": 2.1072796934865904e-06, "loss": 0.5128, "step": 39000 }, { "epoch": 2.91, "learning_rate": 1.4932704587877004e-06, "loss": 0.4992, "step": 39500 }, { "epoch": 2.95, "learning_rate": 8.792612240888103e-07, "loss": 0.5009, "step": 40000 }, { "epoch": 2.98, "learning_rate": 2.6525198938992043e-07, "loss": 0.5029, "step": 40500 } ], "logging_steps": 500, "max_steps": 40716, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.0775395963109376e+17, "trial_name": null, "trial_params": null }