{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 58671, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9829558044008114e-05, "loss": 6.2073, "step": 500 }, { "epoch": 0.05, "learning_rate": 1.9659116088016227e-05, "loss": 5.3027, "step": 1000 }, { "epoch": 0.08, "learning_rate": 1.948867413202434e-05, "loss": 5.0065, "step": 1500 }, { "epoch": 0.1, "learning_rate": 1.9318232176032456e-05, "loss": 4.808, "step": 2000 }, { "epoch": 0.13, "learning_rate": 1.9147790220040568e-05, "loss": 4.6909, "step": 2500 }, { "epoch": 0.15, "learning_rate": 1.897734826404868e-05, "loss": 4.575, "step": 3000 }, { "epoch": 0.18, "learning_rate": 1.8806906308056793e-05, "loss": 4.4813, "step": 3500 }, { "epoch": 0.2, "learning_rate": 1.8636464352064906e-05, "loss": 4.4051, "step": 4000 }, { "epoch": 0.23, "learning_rate": 1.846602239607302e-05, "loss": 4.3434, "step": 4500 }, { "epoch": 0.26, "learning_rate": 1.829558044008113e-05, "loss": 4.2976, "step": 5000 }, { "epoch": 0.28, "learning_rate": 1.8125138484089244e-05, "loss": 4.2479, "step": 5500 }, { "epoch": 0.31, "learning_rate": 1.7954696528097357e-05, "loss": 4.2003, "step": 6000 }, { "epoch": 0.33, "learning_rate": 1.778425457210547e-05, "loss": 4.1561, "step": 6500 }, { "epoch": 0.36, "learning_rate": 1.7613812616113585e-05, "loss": 4.1258, "step": 7000 }, { "epoch": 0.38, "learning_rate": 1.7443370660121698e-05, "loss": 4.0757, "step": 7500 }, { "epoch": 0.41, "learning_rate": 1.727292870412981e-05, "loss": 4.049, "step": 8000 }, { "epoch": 0.43, "learning_rate": 1.7102486748137923e-05, "loss": 4.0258, "step": 8500 }, { "epoch": 0.46, "learning_rate": 1.6932044792146036e-05, "loss": 3.9749, "step": 9000 }, { "epoch": 0.49, "learning_rate": 1.676160283615415e-05, "loss": 3.9751, "step": 9500 }, { "epoch": 0.51, "learning_rate": 1.659116088016226e-05, "loss": 3.9302, "step": 10000 }, { "epoch": 0.54, "learning_rate": 1.6420718924170377e-05, "loss": 3.9167, "step": 10500 }, { "epoch": 0.56, "learning_rate": 1.625027696817849e-05, "loss": 3.8915, "step": 11000 }, { "epoch": 0.59, "learning_rate": 1.6079835012186602e-05, "loss": 3.8704, "step": 11500 }, { "epoch": 0.61, "learning_rate": 1.5909393056194715e-05, "loss": 3.8542, "step": 12000 }, { "epoch": 0.64, "learning_rate": 1.5738951100202828e-05, "loss": 3.8171, "step": 12500 }, { "epoch": 0.66, "learning_rate": 1.556850914421094e-05, "loss": 3.8117, "step": 13000 }, { "epoch": 0.69, "learning_rate": 1.5398067188219053e-05, "loss": 3.7954, "step": 13500 }, { "epoch": 0.72, "learning_rate": 1.5227625232227166e-05, "loss": 3.7836, "step": 14000 }, { "epoch": 0.74, "learning_rate": 1.5057183276235278e-05, "loss": 3.7632, "step": 14500 }, { "epoch": 0.77, "learning_rate": 1.4886741320243392e-05, "loss": 3.7434, "step": 15000 }, { "epoch": 0.79, "learning_rate": 1.4716299364251505e-05, "loss": 3.7308, "step": 15500 }, { "epoch": 0.82, "learning_rate": 1.4545857408259618e-05, "loss": 3.7129, "step": 16000 }, { "epoch": 0.84, "learning_rate": 1.4375415452267732e-05, "loss": 3.7043, "step": 16500 }, { "epoch": 0.87, "learning_rate": 1.4204973496275845e-05, "loss": 3.6853, "step": 17000 }, { "epoch": 0.89, "learning_rate": 1.4034531540283957e-05, "loss": 3.6935, "step": 17500 }, { "epoch": 0.92, "learning_rate": 1.3864089584292072e-05, "loss": 3.6671, "step": 18000 }, { "epoch": 0.95, "learning_rate": 1.3693647628300184e-05, "loss": 3.6662, "step": 18500 }, { "epoch": 0.97, "learning_rate": 1.3523205672308297e-05, "loss": 3.6511, "step": 19000 }, { "epoch": 1.0, "learning_rate": 1.335276371631641e-05, "loss": 3.6389, "step": 19500 }, { "epoch": 1.02, "learning_rate": 1.3182321760324524e-05, "loss": 3.6311, "step": 20000 }, { "epoch": 1.05, "learning_rate": 1.3011879804332637e-05, "loss": 3.6067, "step": 20500 }, { "epoch": 1.07, "learning_rate": 1.2841437848340747e-05, "loss": 3.589, "step": 21000 }, { "epoch": 1.1, "learning_rate": 1.267099589234886e-05, "loss": 3.5954, "step": 21500 }, { "epoch": 1.12, "learning_rate": 1.2500553936356974e-05, "loss": 3.5915, "step": 22000 }, { "epoch": 1.15, "learning_rate": 1.2330111980365087e-05, "loss": 3.5764, "step": 22500 }, { "epoch": 1.18, "learning_rate": 1.21596700243732e-05, "loss": 3.5697, "step": 23000 }, { "epoch": 1.2, "learning_rate": 1.1989228068381314e-05, "loss": 3.5774, "step": 23500 }, { "epoch": 1.23, "learning_rate": 1.1818786112389427e-05, "loss": 3.5531, "step": 24000 }, { "epoch": 1.25, "learning_rate": 1.164834415639754e-05, "loss": 3.5561, "step": 24500 }, { "epoch": 1.28, "learning_rate": 1.1477902200405654e-05, "loss": 3.5495, "step": 25000 }, { "epoch": 1.3, "learning_rate": 1.1307460244413766e-05, "loss": 3.5391, "step": 25500 }, { "epoch": 1.33, "learning_rate": 1.1137018288421879e-05, "loss": 3.5351, "step": 26000 }, { "epoch": 1.36, "learning_rate": 1.0966576332429991e-05, "loss": 3.5234, "step": 26500 }, { "epoch": 1.38, "learning_rate": 1.0796134376438106e-05, "loss": 3.5186, "step": 27000 }, { "epoch": 1.41, "learning_rate": 1.0625692420446218e-05, "loss": 3.5007, "step": 27500 }, { "epoch": 1.43, "learning_rate": 1.0455250464454331e-05, "loss": 3.49, "step": 28000 }, { "epoch": 1.46, "learning_rate": 1.0284808508462445e-05, "loss": 3.4919, "step": 28500 }, { "epoch": 1.48, "learning_rate": 1.0114366552470556e-05, "loss": 3.4896, "step": 29000 }, { "epoch": 1.51, "learning_rate": 9.94392459647867e-06, "loss": 3.4825, "step": 29500 }, { "epoch": 1.53, "learning_rate": 9.773482640486783e-06, "loss": 3.4936, "step": 30000 }, { "epoch": 1.56, "learning_rate": 9.603040684494896e-06, "loss": 3.478, "step": 30500 }, { "epoch": 1.59, "learning_rate": 9.43259872850301e-06, "loss": 3.4659, "step": 31000 }, { "epoch": 1.61, "learning_rate": 9.262156772511121e-06, "loss": 3.4502, "step": 31500 }, { "epoch": 1.64, "learning_rate": 9.091714816519236e-06, "loss": 3.4523, "step": 32000 }, { "epoch": 1.66, "learning_rate": 8.921272860527348e-06, "loss": 3.4599, "step": 32500 }, { "epoch": 1.69, "learning_rate": 8.75083090453546e-06, "loss": 3.4489, "step": 33000 }, { "epoch": 1.71, "learning_rate": 8.580388948543575e-06, "loss": 3.4568, "step": 33500 }, { "epoch": 1.74, "learning_rate": 8.409946992551688e-06, "loss": 3.4514, "step": 34000 }, { "epoch": 1.76, "learning_rate": 8.2395050365598e-06, "loss": 3.4451, "step": 34500 }, { "epoch": 1.79, "learning_rate": 8.069063080567913e-06, "loss": 3.4447, "step": 35000 }, { "epoch": 1.82, "learning_rate": 7.898621124576026e-06, "loss": 3.4381, "step": 35500 }, { "epoch": 1.84, "learning_rate": 7.728179168584138e-06, "loss": 3.4288, "step": 36000 }, { "epoch": 1.87, "learning_rate": 7.557737212592252e-06, "loss": 3.4213, "step": 36500 }, { "epoch": 1.89, "learning_rate": 7.387295256600365e-06, "loss": 3.4281, "step": 37000 }, { "epoch": 1.92, "learning_rate": 7.216853300608479e-06, "loss": 3.4222, "step": 37500 }, { "epoch": 1.94, "learning_rate": 7.046411344616591e-06, "loss": 3.4239, "step": 38000 }, { "epoch": 1.97, "learning_rate": 6.875969388624705e-06, "loss": 3.3998, "step": 38500 }, { "epoch": 1.99, "learning_rate": 6.705527432632818e-06, "loss": 3.418, "step": 39000 }, { "epoch": 2.02, "learning_rate": 6.53508547664093e-06, "loss": 3.3867, "step": 39500 }, { "epoch": 2.05, "learning_rate": 6.364643520649043e-06, "loss": 3.3929, "step": 40000 }, { "epoch": 2.07, "learning_rate": 6.194201564657156e-06, "loss": 3.3855, "step": 40500 }, { "epoch": 2.1, "learning_rate": 6.02375960866527e-06, "loss": 3.3964, "step": 41000 }, { "epoch": 2.12, "learning_rate": 5.853317652673382e-06, "loss": 3.3931, "step": 41500 }, { "epoch": 2.15, "learning_rate": 5.682875696681496e-06, "loss": 3.3802, "step": 42000 }, { "epoch": 2.17, "learning_rate": 5.512433740689609e-06, "loss": 3.3849, "step": 42500 }, { "epoch": 2.2, "learning_rate": 5.341991784697722e-06, "loss": 3.3844, "step": 43000 }, { "epoch": 2.22, "learning_rate": 5.171549828705834e-06, "loss": 3.3835, "step": 43500 }, { "epoch": 2.25, "learning_rate": 5.001107872713947e-06, "loss": 3.379, "step": 44000 }, { "epoch": 2.28, "learning_rate": 4.830665916722061e-06, "loss": 3.3698, "step": 44500 }, { "epoch": 2.3, "learning_rate": 4.660223960730173e-06, "loss": 3.3895, "step": 45000 }, { "epoch": 2.33, "learning_rate": 4.489782004738287e-06, "loss": 3.3659, "step": 45500 }, { "epoch": 2.35, "learning_rate": 4.3193400487464e-06, "loss": 3.3576, "step": 46000 }, { "epoch": 2.38, "learning_rate": 4.148898092754513e-06, "loss": 3.359, "step": 46500 }, { "epoch": 2.4, "learning_rate": 3.9784561367626255e-06, "loss": 3.3535, "step": 47000 }, { "epoch": 2.43, "learning_rate": 3.808014180770739e-06, "loss": 3.3653, "step": 47500 }, { "epoch": 2.45, "learning_rate": 3.637572224778852e-06, "loss": 3.3563, "step": 48000 }, { "epoch": 2.48, "learning_rate": 3.467130268786965e-06, "loss": 3.3519, "step": 48500 }, { "epoch": 2.51, "learning_rate": 3.2966883127950777e-06, "loss": 3.3573, "step": 49000 }, { "epoch": 2.53, "learning_rate": 3.1262463568031908e-06, "loss": 3.3555, "step": 49500 }, { "epoch": 2.56, "learning_rate": 2.955804400811304e-06, "loss": 3.3708, "step": 50000 }, { "epoch": 2.58, "learning_rate": 2.7853624448194173e-06, "loss": 3.369, "step": 50500 }, { "epoch": 2.61, "learning_rate": 2.61492048882753e-06, "loss": 3.3409, "step": 51000 }, { "epoch": 2.63, "learning_rate": 2.444478532835643e-06, "loss": 3.36, "step": 51500 }, { "epoch": 2.66, "learning_rate": 2.274036576843756e-06, "loss": 3.3517, "step": 52000 }, { "epoch": 2.68, "learning_rate": 2.103594620851869e-06, "loss": 3.3437, "step": 52500 }, { "epoch": 2.71, "learning_rate": 1.933152664859982e-06, "loss": 3.3288, "step": 53000 }, { "epoch": 2.74, "learning_rate": 1.762710708868095e-06, "loss": 3.3543, "step": 53500 }, { "epoch": 2.76, "learning_rate": 1.5922687528762083e-06, "loss": 3.3522, "step": 54000 }, { "epoch": 2.79, "learning_rate": 1.4218267968843211e-06, "loss": 3.3379, "step": 54500 }, { "epoch": 2.81, "learning_rate": 1.2513848408924342e-06, "loss": 3.343, "step": 55000 }, { "epoch": 2.84, "learning_rate": 1.0809428849005472e-06, "loss": 3.335, "step": 55500 }, { "epoch": 2.86, "learning_rate": 9.105009289086603e-07, "loss": 3.3479, "step": 56000 }, { "epoch": 2.89, "learning_rate": 7.400589729167732e-07, "loss": 3.3421, "step": 56500 }, { "epoch": 2.91, "learning_rate": 5.696170169248863e-07, "loss": 3.3387, "step": 57000 }, { "epoch": 2.94, "learning_rate": 3.991750609329993e-07, "loss": 3.34, "step": 57500 }, { "epoch": 2.97, "learning_rate": 2.2873310494111234e-07, "loss": 3.3498, "step": 58000 }, { "epoch": 2.99, "learning_rate": 5.829114894922535e-08, "loss": 3.3345, "step": 58500 }, { "epoch": 3.0, "step": 58671, "total_flos": 0, "train_runtime": 13443.7441, "train_samples_per_second": 4.364 } ], "max_steps": 58671, "num_train_epochs": 3, "total_flos": 0, "trial_name": null, "trial_params": null }