{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.756756756756757, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.996861268047709e-05, "loss": 2.8343, "step": 10 }, { "epoch": 0.27, "learning_rate": 4.9937225360954175e-05, "loss": 2.6082, "step": 20 }, { "epoch": 0.41, "learning_rate": 4.9905838041431265e-05, "loss": 2.7027, "step": 30 }, { "epoch": 0.54, "learning_rate": 4.987445072190835e-05, "loss": 2.5134, "step": 40 }, { "epoch": 0.68, "learning_rate": 4.984306340238544e-05, "loss": 2.413, "step": 50 }, { "epoch": 0.81, "learning_rate": 4.981167608286253e-05, "loss": 2.4923, "step": 60 }, { "epoch": 0.95, "learning_rate": 4.978028876333961e-05, "loss": 2.5383, "step": 70 }, { "epoch": 1.08, "learning_rate": 4.97489014438167e-05, "loss": 2.1952, "step": 80 }, { "epoch": 1.22, "learning_rate": 4.971751412429379e-05, "loss": 2.3365, "step": 90 }, { "epoch": 1.35, "learning_rate": 4.968612680477088e-05, "loss": 2.2645, "step": 100 }, { "epoch": 1.49, "learning_rate": 4.965473948524796e-05, "loss": 2.2224, "step": 110 }, { "epoch": 1.62, "learning_rate": 4.962335216572505e-05, "loss": 2.1858, "step": 120 }, { "epoch": 1.76, "learning_rate": 4.959196484620214e-05, "loss": 2.01, "step": 130 }, { "epoch": 1.89, "learning_rate": 4.956057752667922e-05, "loss": 2.0707, "step": 140 }, { "epoch": 2.03, "learning_rate": 4.952919020715631e-05, "loss": 2.1271, "step": 150 }, { "epoch": 2.16, "learning_rate": 4.9497802887633396e-05, "loss": 2.1912, "step": 160 }, { "epoch": 2.3, "learning_rate": 4.9466415568110485e-05, "loss": 1.9693, "step": 170 }, { "epoch": 2.43, "learning_rate": 4.9435028248587575e-05, "loss": 1.7561, "step": 180 }, { "epoch": 2.57, "learning_rate": 4.940364092906466e-05, "loss": 2.1115, "step": 190 }, { "epoch": 2.7, "learning_rate": 4.937225360954175e-05, "loss": 1.9374, "step": 200 }, { "epoch": 2.84, "learning_rate": 4.934086629001883e-05, "loss": 1.8973, "step": 210 }, { "epoch": 2.97, "learning_rate": 4.930947897049592e-05, "loss": 1.9069, "step": 220 }, { "epoch": 3.11, "learning_rate": 4.927809165097301e-05, "loss": 1.8768, "step": 230 }, { "epoch": 3.24, "learning_rate": 4.924670433145009e-05, "loss": 1.7559, "step": 240 }, { "epoch": 3.38, "learning_rate": 4.921531701192718e-05, "loss": 1.6955, "step": 250 }, { "epoch": 3.51, "learning_rate": 4.918392969240427e-05, "loss": 1.4821, "step": 260 }, { "epoch": 3.65, "learning_rate": 4.915254237288136e-05, "loss": 1.7175, "step": 270 }, { "epoch": 3.78, "learning_rate": 4.9121155053358444e-05, "loss": 1.6685, "step": 280 }, { "epoch": 3.92, "learning_rate": 4.9089767733835534e-05, "loss": 1.6313, "step": 290 }, { "epoch": 4.05, "learning_rate": 4.9058380414312623e-05, "loss": 1.6231, "step": 300 }, { "epoch": 4.19, "learning_rate": 4.9026993094789706e-05, "loss": 1.4119, "step": 310 }, { "epoch": 4.32, "learning_rate": 4.8995605775266796e-05, "loss": 1.2547, "step": 320 }, { "epoch": 4.46, "learning_rate": 4.896421845574388e-05, "loss": 1.3748, "step": 330 }, { "epoch": 4.59, "learning_rate": 4.893283113622097e-05, "loss": 1.3336, "step": 340 }, { "epoch": 4.73, "learning_rate": 4.890144381669806e-05, "loss": 1.4334, "step": 350 }, { "epoch": 4.86, "learning_rate": 4.887005649717514e-05, "loss": 1.4155, "step": 360 }, { "epoch": 5.0, "learning_rate": 4.883866917765223e-05, "loss": 1.4832, "step": 370 }, { "epoch": 5.14, "learning_rate": 4.8807281858129313e-05, "loss": 0.9712, "step": 380 }, { "epoch": 5.27, "learning_rate": 4.87758945386064e-05, "loss": 1.1672, "step": 390 }, { "epoch": 5.41, "learning_rate": 4.874450721908349e-05, "loss": 1.338, "step": 400 }, { "epoch": 5.54, "learning_rate": 4.8713119899560576e-05, "loss": 1.0129, "step": 410 }, { "epoch": 5.68, "learning_rate": 4.8681732580037665e-05, "loss": 1.1344, "step": 420 }, { "epoch": 5.81, "learning_rate": 4.8650345260514755e-05, "loss": 1.139, "step": 430 }, { "epoch": 5.95, "learning_rate": 4.8618957940991844e-05, "loss": 1.1943, "step": 440 }, { "epoch": 6.08, "learning_rate": 4.8587570621468934e-05, "loss": 0.9281, "step": 450 }, { "epoch": 6.22, "learning_rate": 4.855618330194602e-05, "loss": 0.9121, "step": 460 }, { "epoch": 6.35, "learning_rate": 4.8524795982423107e-05, "loss": 1.0089, "step": 470 }, { "epoch": 6.49, "learning_rate": 4.849340866290019e-05, "loss": 0.9739, "step": 480 }, { "epoch": 6.62, "learning_rate": 4.846202134337728e-05, "loss": 0.958, "step": 490 }, { "epoch": 6.76, "learning_rate": 4.843063402385437e-05, "loss": 0.7523, "step": 500 } ], "logging_steps": 10, "max_steps": 15930, "num_train_epochs": 216, "save_steps": 500, "total_flos": 2.821509238259712e+16, "trial_name": null, "trial_params": null }