{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 742, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.999875511956753e-05, "loss": 1.7561, "step": 10 }, { "epoch": 0.03, "learning_rate": 9.999502054025923e-05, "loss": 1.7194, "step": 20 }, { "epoch": 0.04, "learning_rate": 9.998879644803927e-05, "loss": 1.7705, "step": 30 }, { "epoch": 0.05, "learning_rate": 9.998008315283767e-05, "loss": 1.5094, "step": 40 }, { "epoch": 0.07, "learning_rate": 9.996888108853486e-05, "loss": 1.5201, "step": 50 }, { "epoch": 0.08, "learning_rate": 9.995519081294009e-05, "loss": 1.637, "step": 60 }, { "epoch": 0.09, "learning_rate": 9.993901300776359e-05, "loss": 1.6338, "step": 70 }, { "epoch": 0.11, "learning_rate": 9.992034847858267e-05, "loss": 1.5496, "step": 80 }, { "epoch": 0.12, "learning_rate": 9.989919815480165e-05, "loss": 1.5669, "step": 90 }, { "epoch": 0.13, "learning_rate": 9.987556308960549e-05, "loss": 1.5622, "step": 100 }, { "epoch": 0.15, "learning_rate": 9.984944445990735e-05, "loss": 1.5836, "step": 110 }, { "epoch": 0.16, "learning_rate": 9.982084356629015e-05, "loss": 1.5877, "step": 120 }, { "epoch": 0.18, "learning_rate": 9.978976183294153e-05, "loss": 1.5465, "step": 130 }, { "epoch": 0.19, "learning_rate": 9.97562008075832e-05, "loss": 1.4834, "step": 140 }, { "epoch": 0.2, "learning_rate": 9.97201621613937e-05, "loss": 1.4956, "step": 150 }, { "epoch": 0.22, "learning_rate": 9.968164768892526e-05, "loss": 1.3877, "step": 160 }, { "epoch": 0.23, "learning_rate": 9.964065930801438e-05, "loss": 1.3466, "step": 170 }, { "epoch": 0.24, "learning_rate": 9.95971990596864e-05, "loss": 1.4023, "step": 180 }, { "epoch": 0.26, "learning_rate": 9.955126910805386e-05, "loss": 1.351, "step": 190 }, { "epoch": 0.27, "learning_rate": 9.950287174020863e-05, "loss": 1.424, "step": 200 }, { "epoch": 0.28, "learning_rate": 9.94520093661082e-05, "loss": 1.5038, "step": 210 }, { "epoch": 0.3, "learning_rate": 9.939868451845554e-05, "loss": 1.3436, "step": 220 }, { "epoch": 0.31, "learning_rate": 9.934289985257299e-05, "loss": 1.3446, "step": 230 }, { "epoch": 0.32, "learning_rate": 9.928465814627014e-05, "loss": 1.4063, "step": 240 }, { "epoch": 0.34, "learning_rate": 9.922396229970541e-05, "loss": 1.3803, "step": 250 }, { "epoch": 0.35, "learning_rate": 9.916081533524167e-05, "loss": 1.2975, "step": 260 }, { "epoch": 0.36, "learning_rate": 9.909522039729571e-05, "loss": 1.3689, "step": 270 }, { "epoch": 0.38, "learning_rate": 9.902718075218176e-05, "loss": 1.4446, "step": 280 }, { "epoch": 0.39, "learning_rate": 9.895669978794869e-05, "loss": 1.237, "step": 290 }, { "epoch": 0.4, "learning_rate": 9.888378101421147e-05, "loss": 1.333, "step": 300 }, { "epoch": 0.42, "learning_rate": 9.880842806197625e-05, "loss": 1.3933, "step": 310 }, { "epoch": 0.43, "learning_rate": 9.873064468345969e-05, "loss": 1.3735, "step": 320 }, { "epoch": 0.44, "learning_rate": 9.865043475190201e-05, "loss": 1.172, "step": 330 }, { "epoch": 0.46, "learning_rate": 9.856780226137419e-05, "loss": 1.2538, "step": 340 }, { "epoch": 0.47, "learning_rate": 9.848275132657903e-05, "loss": 1.2588, "step": 350 }, { "epoch": 0.49, "learning_rate": 9.839528618264633e-05, "loss": 1.3531, "step": 360 }, { "epoch": 0.5, "learning_rate": 9.830541118492193e-05, "loss": 1.4203, "step": 370 }, { "epoch": 0.51, "learning_rate": 9.821313080875088e-05, "loss": 1.4292, "step": 380 }, { "epoch": 0.53, "learning_rate": 9.811844964925454e-05, "loss": 1.3098, "step": 390 }, { "epoch": 0.54, "learning_rate": 9.802137242110185e-05, "loss": 1.2093, "step": 400 }, { "epoch": 0.55, "learning_rate": 9.792190395827447e-05, "loss": 1.2182, "step": 410 }, { "epoch": 0.57, "learning_rate": 9.782004921382612e-05, "loss": 1.3148, "step": 420 }, { "epoch": 0.58, "learning_rate": 9.771581325963594e-05, "loss": 1.4185, "step": 430 }, { "epoch": 0.59, "learning_rate": 9.760920128615591e-05, "loss": 1.409, "step": 440 }, { "epoch": 0.61, "learning_rate": 9.750021860215241e-05, "loss": 1.2089, "step": 450 }, { "epoch": 0.62, "learning_rate": 9.738887063444188e-05, "loss": 1.3114, "step": 460 }, { "epoch": 0.63, "learning_rate": 9.727516292762058e-05, "loss": 1.2528, "step": 470 }, { "epoch": 0.65, "learning_rate": 9.715910114378845e-05, "loss": 1.1718, "step": 480 }, { "epoch": 0.66, "learning_rate": 9.704069106226727e-05, "loss": 1.2825, "step": 490 }, { "epoch": 0.67, "learning_rate": 9.691993857931277e-05, "loss": 1.1629, "step": 500 }, { "epoch": 0.69, "learning_rate": 9.679684970782106e-05, "loss": 1.2463, "step": 510 }, { "epoch": 0.7, "learning_rate": 9.667143057702926e-05, "loss": 1.3297, "step": 520 }, { "epoch": 0.71, "learning_rate": 9.654368743221022e-05, "loss": 1.1995, "step": 530 }, { "epoch": 0.73, "learning_rate": 9.641362663436161e-05, "loss": 1.1135, "step": 540 }, { "epoch": 0.74, "learning_rate": 9.628125465988913e-05, "loss": 1.1311, "step": 550 }, { "epoch": 0.75, "learning_rate": 9.614657810028403e-05, "loss": 1.2915, "step": 560 }, { "epoch": 0.77, "learning_rate": 9.60096036617948e-05, "loss": 1.1898, "step": 570 }, { "epoch": 0.78, "learning_rate": 9.587033816509341e-05, "loss": 1.211, "step": 580 }, { "epoch": 0.8, "learning_rate": 9.572878854493553e-05, "loss": 1.1389, "step": 590 }, { "epoch": 0.81, "learning_rate": 9.558496184981525e-05, "loss": 1.0623, "step": 600 }, { "epoch": 0.82, "learning_rate": 9.543886524161409e-05, "loss": 1.1475, "step": 610 }, { "epoch": 0.84, "learning_rate": 9.529050599524443e-05, "loss": 1.0662, "step": 620 }, { "epoch": 0.85, "learning_rate": 9.513989149828718e-05, "loss": 1.2403, "step": 630 }, { "epoch": 0.86, "learning_rate": 9.498702925062393e-05, "loss": 1.3078, "step": 640 }, { "epoch": 0.88, "learning_rate": 9.48319268640635e-05, "loss": 1.2456, "step": 650 }, { "epoch": 0.89, "learning_rate": 9.467459206196298e-05, "loss": 1.1248, "step": 660 }, { "epoch": 0.9, "learning_rate": 9.451503267884299e-05, "loss": 1.0899, "step": 670 }, { "epoch": 0.92, "learning_rate": 9.435325665999771e-05, "loss": 1.3189, "step": 680 }, { "epoch": 0.93, "learning_rate": 9.418927206109913e-05, "loss": 0.9791, "step": 690 }, { "epoch": 0.94, "learning_rate": 9.402308704779599e-05, "loss": 1.112, "step": 700 }, { "epoch": 0.96, "learning_rate": 9.385470989530716e-05, "loss": 1.0378, "step": 710 }, { "epoch": 0.97, "learning_rate": 9.368414898800952e-05, "loss": 1.2503, "step": 720 }, { "epoch": 0.98, "learning_rate": 9.35114128190205e-05, "loss": 1.0507, "step": 730 }, { "epoch": 1.0, "learning_rate": 9.333650998977518e-05, "loss": 1.0279, "step": 740 } ], "logging_steps": 10, "max_steps": 4452, "num_train_epochs": 6, "save_steps": 500, "total_flos": 3877680160604160.0, "trial_name": null, "trial_params": null }