{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 68478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.963491924413681e-05, "loss": 2.5929, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.9269838488273604e-05, "loss": 2.2003, "step": 1000 }, { "epoch": 0.07, "learning_rate": 4.890475773241041e-05, "loss": 2.0692, "step": 1500 }, { "epoch": 0.09, "learning_rate": 4.853967697654722e-05, "loss": 1.9485, "step": 2000 }, { "epoch": 0.11, "learning_rate": 4.817459622068401e-05, "loss": 1.9084, "step": 2500 }, { "epoch": 0.13, "learning_rate": 4.780951546482082e-05, "loss": 1.8495, "step": 3000 }, { "epoch": 0.15, "learning_rate": 4.744443470895763e-05, "loss": 1.8025, "step": 3500 }, { "epoch": 0.18, "learning_rate": 4.707935395309443e-05, "loss": 1.7957, "step": 4000 }, { "epoch": 0.2, "learning_rate": 4.671427319723123e-05, "loss": 1.7414, "step": 4500 }, { "epoch": 0.22, "learning_rate": 4.634919244136804e-05, "loss": 1.7284, "step": 5000 }, { "epoch": 0.24, "learning_rate": 4.598411168550484e-05, "loss": 1.7033, "step": 5500 }, { "epoch": 0.26, "learning_rate": 4.561903092964164e-05, "loss": 1.705, "step": 6000 }, { "epoch": 0.28, "learning_rate": 4.525395017377844e-05, "loss": 1.667, "step": 6500 }, { "epoch": 0.31, "learning_rate": 4.4888869417915246e-05, "loss": 1.6606, "step": 7000 }, { "epoch": 0.33, "learning_rate": 4.452378866205205e-05, "loss": 1.6179, "step": 7500 }, { "epoch": 0.35, "learning_rate": 4.415870790618885e-05, "loss": 1.5977, "step": 8000 }, { "epoch": 0.37, "learning_rate": 4.3793627150325655e-05, "loss": 1.5827, "step": 8500 }, { "epoch": 0.39, "learning_rate": 4.3428546394462456e-05, "loss": 1.5621, "step": 9000 }, { "epoch": 0.42, "learning_rate": 4.306346563859926e-05, "loss": 1.5675, "step": 9500 }, { "epoch": 0.44, "learning_rate": 4.2698384882736064e-05, "loss": 1.5729, "step": 10000 }, { "epoch": 0.46, "learning_rate": 4.2333304126872865e-05, "loss": 1.546, "step": 10500 }, { "epoch": 0.48, "learning_rate": 4.1968223371009666e-05, "loss": 1.5546, "step": 11000 }, { "epoch": 0.5, "learning_rate": 4.160314261514647e-05, "loss": 1.5163, "step": 11500 }, { "epoch": 0.53, "learning_rate": 4.123806185928328e-05, "loss": 1.5097, "step": 12000 }, { "epoch": 0.55, "learning_rate": 4.0872981103420075e-05, "loss": 1.5074, "step": 12500 }, { "epoch": 0.57, "learning_rate": 4.050790034755688e-05, "loss": 1.4856, "step": 13000 }, { "epoch": 0.59, "learning_rate": 4.014281959169369e-05, "loss": 1.5068, "step": 13500 }, { "epoch": 0.61, "learning_rate": 3.9777738835830483e-05, "loss": 1.5035, "step": 14000 }, { "epoch": 0.64, "learning_rate": 3.941265807996729e-05, "loss": 1.4676, "step": 14500 }, { "epoch": 0.66, "learning_rate": 3.90475773241041e-05, "loss": 1.4623, "step": 15000 }, { "epoch": 0.68, "learning_rate": 3.868249656824089e-05, "loss": 1.4472, "step": 15500 }, { "epoch": 0.7, "learning_rate": 3.83174158123777e-05, "loss": 1.447, "step": 16000 }, { "epoch": 0.72, "learning_rate": 3.795233505651451e-05, "loss": 1.4557, "step": 16500 }, { "epoch": 0.74, "learning_rate": 3.75872543006513e-05, "loss": 1.4439, "step": 17000 }, { "epoch": 0.77, "learning_rate": 3.722217354478811e-05, "loss": 1.4332, "step": 17500 }, { "epoch": 0.79, "learning_rate": 3.685709278892491e-05, "loss": 1.4365, "step": 18000 }, { "epoch": 0.81, "learning_rate": 3.649201203306172e-05, "loss": 1.4098, "step": 18500 }, { "epoch": 0.83, "learning_rate": 3.612693127719852e-05, "loss": 1.4125, "step": 19000 }, { "epoch": 0.85, "learning_rate": 3.576185052133532e-05, "loss": 1.4088, "step": 19500 }, { "epoch": 0.88, "learning_rate": 3.5396769765472126e-05, "loss": 1.3968, "step": 20000 }, { "epoch": 0.9, "learning_rate": 3.503168900960893e-05, "loss": 1.4125, "step": 20500 }, { "epoch": 0.92, "learning_rate": 3.466660825374573e-05, "loss": 1.385, "step": 21000 }, { "epoch": 0.94, "learning_rate": 3.4301527497882535e-05, "loss": 1.3985, "step": 21500 }, { "epoch": 0.96, "learning_rate": 3.3936446742019336e-05, "loss": 1.3747, "step": 22000 }, { "epoch": 0.99, "learning_rate": 3.3571365986156136e-05, "loss": 1.3743, "step": 22500 }, { "epoch": 1.01, "learning_rate": 3.3206285230292944e-05, "loss": 1.3065, "step": 23000 }, { "epoch": 1.03, "learning_rate": 3.2841204474429745e-05, "loss": 1.1883, "step": 23500 }, { "epoch": 1.05, "learning_rate": 3.2476123718566545e-05, "loss": 1.2017, "step": 24000 }, { "epoch": 1.07, "learning_rate": 3.211104296270335e-05, "loss": 1.1919, "step": 24500 }, { "epoch": 1.1, "learning_rate": 3.1745962206840153e-05, "loss": 1.1873, "step": 25000 }, { "epoch": 1.12, "learning_rate": 3.1380881450976954e-05, "loss": 1.2045, "step": 25500 }, { "epoch": 1.14, "learning_rate": 3.101580069511376e-05, "loss": 1.1915, "step": 26000 }, { "epoch": 1.16, "learning_rate": 3.065071993925057e-05, "loss": 1.1932, "step": 26500 }, { "epoch": 1.18, "learning_rate": 3.0285639183387367e-05, "loss": 1.1793, "step": 27000 }, { "epoch": 1.2, "learning_rate": 2.992055842752417e-05, "loss": 1.2086, "step": 27500 }, { "epoch": 1.23, "learning_rate": 2.9555477671660975e-05, "loss": 1.2023, "step": 28000 }, { "epoch": 1.25, "learning_rate": 2.9190396915797775e-05, "loss": 1.1953, "step": 28500 }, { "epoch": 1.27, "learning_rate": 2.882531615993458e-05, "loss": 1.2049, "step": 29000 }, { "epoch": 1.29, "learning_rate": 2.8460235404071384e-05, "loss": 1.1931, "step": 29500 }, { "epoch": 1.31, "learning_rate": 2.8095154648208184e-05, "loss": 1.197, "step": 30000 }, { "epoch": 1.34, "learning_rate": 2.773007389234499e-05, "loss": 1.1704, "step": 30500 }, { "epoch": 1.36, "learning_rate": 2.7364993136481793e-05, "loss": 1.1548, "step": 31000 }, { "epoch": 1.38, "learning_rate": 2.6999912380618593e-05, "loss": 1.187, "step": 31500 }, { "epoch": 1.4, "learning_rate": 2.6634831624755397e-05, "loss": 1.1769, "step": 32000 }, { "epoch": 1.42, "learning_rate": 2.62697508688922e-05, "loss": 1.181, "step": 32500 }, { "epoch": 1.45, "learning_rate": 2.5904670113029006e-05, "loss": 1.1564, "step": 33000 }, { "epoch": 1.47, "learning_rate": 2.5539589357165806e-05, "loss": 1.1742, "step": 33500 }, { "epoch": 1.49, "learning_rate": 2.517450860130261e-05, "loss": 1.1651, "step": 34000 }, { "epoch": 1.51, "learning_rate": 2.480942784543941e-05, "loss": 1.1637, "step": 34500 }, { "epoch": 1.53, "learning_rate": 2.4444347089576215e-05, "loss": 1.1531, "step": 35000 }, { "epoch": 1.56, "learning_rate": 2.407926633371302e-05, "loss": 1.1703, "step": 35500 }, { "epoch": 1.58, "learning_rate": 2.371418557784982e-05, "loss": 1.1482, "step": 36000 }, { "epoch": 1.6, "learning_rate": 2.3349104821986624e-05, "loss": 1.1501, "step": 36500 }, { "epoch": 1.62, "learning_rate": 2.2984024066123428e-05, "loss": 1.1525, "step": 37000 }, { "epoch": 1.64, "learning_rate": 2.261894331026023e-05, "loss": 1.1576, "step": 37500 }, { "epoch": 1.66, "learning_rate": 2.2253862554397033e-05, "loss": 1.1518, "step": 38000 }, { "epoch": 1.69, "learning_rate": 2.1888781798533837e-05, "loss": 1.1594, "step": 38500 }, { "epoch": 1.71, "learning_rate": 2.152370104267064e-05, "loss": 1.1614, "step": 39000 }, { "epoch": 1.73, "learning_rate": 2.1158620286807442e-05, "loss": 1.185, "step": 39500 }, { "epoch": 1.75, "learning_rate": 2.0793539530944246e-05, "loss": 1.1399, "step": 40000 }, { "epoch": 1.77, "learning_rate": 2.042845877508105e-05, "loss": 1.1262, "step": 40500 }, { "epoch": 1.8, "learning_rate": 2.006337801921785e-05, "loss": 1.1458, "step": 41000 }, { "epoch": 1.82, "learning_rate": 1.9698297263354655e-05, "loss": 1.1379, "step": 41500 }, { "epoch": 1.84, "learning_rate": 1.933321650749146e-05, "loss": 1.1437, "step": 42000 }, { "epoch": 1.86, "learning_rate": 1.896813575162826e-05, "loss": 1.1161, "step": 42500 }, { "epoch": 1.88, "learning_rate": 1.8603054995765064e-05, "loss": 1.1205, "step": 43000 }, { "epoch": 1.91, "learning_rate": 1.8237974239901868e-05, "loss": 1.1422, "step": 43500 }, { "epoch": 1.93, "learning_rate": 1.787289348403867e-05, "loss": 1.1265, "step": 44000 }, { "epoch": 1.95, "learning_rate": 1.7507812728175473e-05, "loss": 1.0999, "step": 44500 }, { "epoch": 1.97, "learning_rate": 1.7142731972312277e-05, "loss": 1.11, "step": 45000 }, { "epoch": 1.99, "learning_rate": 1.677765121644908e-05, "loss": 1.1267, "step": 45500 }, { "epoch": 2.02, "learning_rate": 1.6412570460585882e-05, "loss": 1.0115, "step": 46000 }, { "epoch": 2.04, "learning_rate": 1.6047489704722686e-05, "loss": 0.9535, "step": 46500 }, { "epoch": 2.06, "learning_rate": 1.568240894885949e-05, "loss": 0.9458, "step": 47000 }, { "epoch": 2.08, "learning_rate": 1.531732819299629e-05, "loss": 0.9517, "step": 47500 }, { "epoch": 2.1, "learning_rate": 1.4952247437133093e-05, "loss": 0.9496, "step": 48000 }, { "epoch": 2.12, "learning_rate": 1.4587166681269897e-05, "loss": 0.9649, "step": 48500 }, { "epoch": 2.15, "learning_rate": 1.42220859254067e-05, "loss": 0.934, "step": 49000 }, { "epoch": 2.17, "learning_rate": 1.3857005169543506e-05, "loss": 0.9757, "step": 49500 }, { "epoch": 2.19, "learning_rate": 1.3491924413680306e-05, "loss": 0.9349, "step": 50000 }, { "epoch": 2.21, "learning_rate": 1.3126843657817109e-05, "loss": 0.9548, "step": 50500 }, { "epoch": 2.23, "learning_rate": 1.2761762901953914e-05, "loss": 0.9426, "step": 51000 }, { "epoch": 2.26, "learning_rate": 1.2396682146090715e-05, "loss": 0.9543, "step": 51500 }, { "epoch": 2.28, "learning_rate": 1.203160139022752e-05, "loss": 0.9438, "step": 52000 }, { "epoch": 2.3, "learning_rate": 1.1666520634364322e-05, "loss": 0.963, "step": 52500 }, { "epoch": 2.32, "learning_rate": 1.1301439878501124e-05, "loss": 0.9334, "step": 53000 }, { "epoch": 2.34, "learning_rate": 1.0936359122637928e-05, "loss": 0.9526, "step": 53500 }, { "epoch": 2.37, "learning_rate": 1.057127836677473e-05, "loss": 0.9397, "step": 54000 }, { "epoch": 2.39, "learning_rate": 1.0206197610911535e-05, "loss": 0.9569, "step": 54500 }, { "epoch": 2.41, "learning_rate": 9.841116855048337e-06, "loss": 0.94, "step": 55000 }, { "epoch": 2.43, "learning_rate": 9.47603609918514e-06, "loss": 0.9345, "step": 55500 }, { "epoch": 2.45, "learning_rate": 9.110955343321944e-06, "loss": 0.933, "step": 56000 }, { "epoch": 2.48, "learning_rate": 8.745874587458746e-06, "loss": 0.9289, "step": 56500 }, { "epoch": 2.5, "learning_rate": 8.380793831595548e-06, "loss": 0.9581, "step": 57000 }, { "epoch": 2.52, "learning_rate": 8.015713075732353e-06, "loss": 0.9203, "step": 57500 }, { "epoch": 2.54, "learning_rate": 7.650632319869155e-06, "loss": 0.9539, "step": 58000 }, { "epoch": 2.56, "learning_rate": 7.285551564005959e-06, "loss": 0.9482, "step": 58500 }, { "epoch": 2.58, "learning_rate": 6.920470808142762e-06, "loss": 0.9206, "step": 59000 }, { "epoch": 2.61, "learning_rate": 6.555390052279564e-06, "loss": 0.9186, "step": 59500 }, { "epoch": 2.63, "learning_rate": 6.190309296416367e-06, "loss": 0.9168, "step": 60000 }, { "epoch": 2.65, "learning_rate": 5.825228540553171e-06, "loss": 0.9151, "step": 60500 }, { "epoch": 2.67, "learning_rate": 5.460147784689974e-06, "loss": 0.924, "step": 61000 }, { "epoch": 2.69, "learning_rate": 5.095067028826777e-06, "loss": 0.916, "step": 61500 }, { "epoch": 2.72, "learning_rate": 4.729986272963579e-06, "loss": 0.9311, "step": 62000 }, { "epoch": 2.74, "learning_rate": 4.364905517100383e-06, "loss": 0.9175, "step": 62500 }, { "epoch": 2.76, "learning_rate": 3.999824761237186e-06, "loss": 0.9121, "step": 63000 }, { "epoch": 2.78, "learning_rate": 3.634744005373989e-06, "loss": 0.9226, "step": 63500 }, { "epoch": 2.8, "learning_rate": 3.269663249510792e-06, "loss": 0.9335, "step": 64000 }, { "epoch": 2.83, "learning_rate": 2.9045824936475953e-06, "loss": 0.9104, "step": 64500 }, { "epoch": 2.85, "learning_rate": 2.539501737784398e-06, "loss": 0.9197, "step": 65000 }, { "epoch": 2.87, "learning_rate": 2.1744209819212013e-06, "loss": 0.8976, "step": 65500 }, { "epoch": 2.89, "learning_rate": 1.8093402260580042e-06, "loss": 0.9401, "step": 66000 }, { "epoch": 2.91, "learning_rate": 1.444259470194807e-06, "loss": 0.9265, "step": 66500 }, { "epoch": 2.94, "learning_rate": 1.07917871433161e-06, "loss": 0.9255, "step": 67000 }, { "epoch": 2.96, "learning_rate": 7.140979584684132e-07, "loss": 0.9099, "step": 67500 }, { "epoch": 2.98, "learning_rate": 3.490172026052163e-07, "loss": 0.9033, "step": 68000 }, { "epoch": 3.0, "step": 68478, "total_flos": 9.218136679415808e+16, "train_loss": 1.233144689885432, "train_runtime": 17748.5921, "train_samples_per_second": 38.582, "train_steps_per_second": 3.858 } ], "max_steps": 68478, "num_train_epochs": 3, "total_flos": 9.218136679415808e+16, "trial_name": null, "trial_params": null }