{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.756756756756757, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.996861268047709e-05, "loss": 2.8545, "step": 10 }, { "epoch": 0.27, "learning_rate": 4.9937225360954175e-05, "loss": 2.6973, "step": 20 }, { "epoch": 0.41, "learning_rate": 4.9905838041431265e-05, "loss": 2.8918, "step": 30 }, { "epoch": 0.54, "learning_rate": 4.987445072190835e-05, "loss": 2.7159, "step": 40 }, { "epoch": 0.68, "learning_rate": 4.984306340238544e-05, "loss": 2.681, "step": 50 }, { "epoch": 0.81, "learning_rate": 4.981167608286253e-05, "loss": 2.7932, "step": 60 }, { "epoch": 0.95, "learning_rate": 4.978028876333961e-05, "loss": 2.9297, "step": 70 }, { "epoch": 1.08, "learning_rate": 4.97489014438167e-05, "loss": 2.58, "step": 80 }, { "epoch": 1.22, "learning_rate": 4.971751412429379e-05, "loss": 2.7518, "step": 90 }, { "epoch": 1.35, "learning_rate": 4.968612680477088e-05, "loss": 2.8066, "step": 100 }, { "epoch": 1.49, "learning_rate": 4.965473948524796e-05, "loss": 2.6671, "step": 110 }, { "epoch": 1.62, "learning_rate": 4.962335216572505e-05, "loss": 2.7779, "step": 120 }, { "epoch": 1.76, "learning_rate": 4.959196484620214e-05, "loss": 2.4978, "step": 130 }, { "epoch": 1.89, "learning_rate": 4.956057752667922e-05, "loss": 2.5562, "step": 140 }, { "epoch": 2.03, "learning_rate": 4.952919020715631e-05, "loss": 2.7411, "step": 150 }, { "epoch": 2.16, "learning_rate": 4.9497802887633396e-05, "loss": 2.7523, "step": 160 }, { "epoch": 2.3, "learning_rate": 4.9466415568110485e-05, "loss": 2.6865, "step": 170 }, { "epoch": 2.43, "learning_rate": 4.9435028248587575e-05, "loss": 2.804, "step": 180 }, { "epoch": 2.57, "learning_rate": 4.940364092906466e-05, "loss": 3.051, "step": 190 }, { "epoch": 2.7, "learning_rate": 4.937225360954175e-05, "loss": 2.7102, "step": 200 }, { "epoch": 2.84, "learning_rate": 4.934086629001883e-05, "loss": 2.6128, "step": 210 }, { "epoch": 2.97, "learning_rate": 4.930947897049592e-05, "loss": 2.632, "step": 220 }, { "epoch": 3.11, "learning_rate": 4.927809165097301e-05, "loss": 2.5651, "step": 230 }, { "epoch": 3.24, "learning_rate": 4.924670433145009e-05, "loss": 2.8894, "step": 240 }, { "epoch": 3.38, "learning_rate": 4.921531701192718e-05, "loss": 2.7573, "step": 250 }, { "epoch": 3.51, "learning_rate": 4.918392969240427e-05, "loss": 2.4314, "step": 260 }, { "epoch": 3.65, "learning_rate": 4.915254237288136e-05, "loss": 2.6477, "step": 270 }, { "epoch": 3.78, "learning_rate": 4.9121155053358444e-05, "loss": 2.5796, "step": 280 }, { "epoch": 3.92, "learning_rate": 4.9089767733835534e-05, "loss": 2.6638, "step": 290 }, { "epoch": 4.05, "learning_rate": 4.9058380414312623e-05, "loss": 2.7746, "step": 300 }, { "epoch": 4.19, "learning_rate": 4.9026993094789706e-05, "loss": 2.8569, "step": 310 }, { "epoch": 4.32, "learning_rate": 4.8995605775266796e-05, "loss": 2.4447, "step": 320 }, { "epoch": 4.46, "learning_rate": 4.896421845574388e-05, "loss": 2.4985, "step": 330 }, { "epoch": 4.59, "learning_rate": 4.893283113622097e-05, "loss": 2.7694, "step": 340 }, { "epoch": 4.73, "learning_rate": 4.890144381669806e-05, "loss": 2.9069, "step": 350 }, { "epoch": 4.86, "learning_rate": 4.887005649717514e-05, "loss": 2.721, "step": 360 }, { "epoch": 5.0, "learning_rate": 4.883866917765223e-05, "loss": 2.5664, "step": 370 }, { "epoch": 5.14, "learning_rate": 4.8807281858129313e-05, "loss": 2.7768, "step": 380 }, { "epoch": 5.27, "learning_rate": 4.87758945386064e-05, "loss": 2.5988, "step": 390 }, { "epoch": 5.41, "learning_rate": 4.874450721908349e-05, "loss": 2.6285, "step": 400 }, { "epoch": 5.54, "learning_rate": 4.8713119899560576e-05, "loss": 2.6223, "step": 410 }, { "epoch": 5.68, "learning_rate": 4.8681732580037665e-05, "loss": 2.6413, "step": 420 }, { "epoch": 5.81, "learning_rate": 4.8650345260514755e-05, "loss": 2.4766, "step": 430 }, { "epoch": 5.95, "learning_rate": 4.8618957940991844e-05, "loss": 2.6344, "step": 440 }, { "epoch": 6.08, "learning_rate": 4.8587570621468934e-05, "loss": 2.6272, "step": 450 }, { "epoch": 6.22, "learning_rate": 4.855618330194602e-05, "loss": 2.6651, "step": 460 }, { "epoch": 6.35, "learning_rate": 4.8524795982423107e-05, "loss": 2.5442, "step": 470 }, { "epoch": 6.49, "learning_rate": 4.849340866290019e-05, "loss": 2.6415, "step": 480 }, { "epoch": 6.62, "learning_rate": 4.846202134337728e-05, "loss": 2.6057, "step": 490 }, { "epoch": 6.76, "learning_rate": 4.843063402385437e-05, "loss": 2.3711, "step": 500 } ], "logging_steps": 10, "max_steps": 15930, "num_train_epochs": 216, "save_steps": 500, "total_flos": 2.816032166019072e+16, "trial_name": null, "trial_params": null }