{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999072786277237, "global_step": 1078, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.998938447446803e-05, "loss": 2.0078, "step": 10 }, { "epoch": 0.04, "learning_rate": 4.9957546913022665e-05, "loss": 1.8414, "step": 20 }, { "epoch": 0.06, "learning_rate": 4.9904514353459654e-05, "loss": 1.7763, "step": 30 }, { "epoch": 0.07, "learning_rate": 4.983033183325818e-05, "loss": 1.7278, "step": 40 }, { "epoch": 0.09, "learning_rate": 4.973506235133323e-05, "loss": 1.7139, "step": 50 }, { "epoch": 0.11, "learning_rate": 4.96313569658781e-05, "loss": 1.6947, "step": 60 }, { "epoch": 0.13, "learning_rate": 4.94962599008322e-05, "loss": 1.6749, "step": 70 }, { "epoch": 0.15, "learning_rate": 4.9340359581993066e-05, "loss": 1.6579, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.916378840646592e-05, "loss": 1.6499, "step": 90 }, { "epoch": 0.19, "learning_rate": 4.8966696325916515e-05, "loss": 1.6507, "step": 100 }, { "epoch": 0.2, "learning_rate": 4.8749250719225915e-05, "loss": 1.6426, "step": 110 }, { "epoch": 0.22, "learning_rate": 4.8511636250345294e-05, "loss": 1.6288, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.825405471147153e-05, "loss": 1.6265, "step": 130 }, { "epoch": 0.26, "learning_rate": 4.797672485167683e-05, "loss": 1.6252, "step": 140 }, { "epoch": 0.28, "learning_rate": 4.7679882191137804e-05, "loss": 1.6218, "step": 150 }, { "epoch": 0.3, "learning_rate": 4.7363778821121784e-05, "loss": 1.6146, "step": 160 }, { "epoch": 0.32, "learning_rate": 4.702868318990039e-05, "loss": 1.604, "step": 170 }, { "epoch": 0.33, "learning_rate": 4.6674879874771926e-05, "loss": 1.607, "step": 180 }, { "epoch": 0.35, "learning_rate": 4.630266934038642e-05, "loss": 1.5972, "step": 190 }, { "epoch": 0.37, "learning_rate": 4.591236768357833e-05, "loss": 1.5973, "step": 200 }, { "epoch": 0.39, "learning_rate": 4.55043063649239e-05, "loss": 1.6043, "step": 210 }, { "epoch": 0.41, "learning_rate": 4.507883192725089e-05, "loss": 1.5839, "step": 220 }, { "epoch": 0.43, "learning_rate": 4.463630570133978e-05, "loss": 1.5923, "step": 230 }, { "epoch": 0.45, "learning_rate": 4.417710349906658e-05, "loss": 1.5841, "step": 240 }, { "epoch": 0.46, "learning_rate": 4.3701615294247465e-05, "loss": 1.5977, "step": 250 }, { "epoch": 0.48, "learning_rate": 4.321024489145673e-05, "loss": 1.5954, "step": 260 }, { "epoch": 0.5, "learning_rate": 4.270340958309888e-05, "loss": 1.5914, "step": 270 }, { "epoch": 0.52, "learning_rate": 4.2181539795026435e-05, "loss": 1.577, "step": 280 }, { "epoch": 0.54, "learning_rate": 4.1645078721004174e-05, "loss": 1.576, "step": 290 }, { "epoch": 0.56, "learning_rate": 4.109448194633033e-05, "loss": 1.5771, "step": 300 }, { "epoch": 0.57, "learning_rate": 4.0530217060934466e-05, "loss": 1.5825, "step": 310 }, { "epoch": 0.59, "learning_rate": 3.9952763262280405e-05, "loss": 1.581, "step": 320 }, { "epoch": 0.61, "learning_rate": 3.9362610948411585e-05, "loss": 1.5691, "step": 330 }, { "epoch": 0.63, "learning_rate": 3.8760261301484466e-05, "loss": 1.5795, "step": 340 }, { "epoch": 0.65, "learning_rate": 3.8208140480771856e-05, "loss": 1.5847, "step": 350 }, { "epoch": 0.67, "learning_rate": 3.758403340686345e-05, "loss": 1.5737, "step": 360 }, { "epoch": 0.69, "learning_rate": 3.6949239442720976e-05, "loss": 1.5645, "step": 370 }, { "epoch": 0.7, "learning_rate": 3.6304297682067144e-05, "loss": 1.5659, "step": 380 }, { "epoch": 0.72, "learning_rate": 3.5649755836560106e-05, "loss": 1.5613, "step": 390 }, { "epoch": 0.74, "learning_rate": 3.4986169770653685e-05, "loss": 1.5725, "step": 400 }, { "epoch": 0.76, "learning_rate": 3.431410302953389e-05, "loss": 1.5619, "step": 410 }, { "epoch": 0.78, "learning_rate": 3.363412636053269e-05, "loss": 1.5615, "step": 420 }, { "epoch": 0.8, "learning_rate": 3.294681722842537e-05, "loss": 1.5623, "step": 430 }, { "epoch": 0.82, "learning_rate": 3.225275932502315e-05, "loss": 1.5685, "step": 440 }, { "epoch": 0.83, "learning_rate": 3.1552542073477555e-05, "loss": 1.566, "step": 450 }, { "epoch": 0.85, "learning_rate": 3.084676012771753e-05, "loss": 1.5643, "step": 460 }, { "epoch": 0.87, "learning_rate": 3.0136012867444297e-05, "loss": 1.5588, "step": 470 }, { "epoch": 0.89, "learning_rate": 2.942090388911291e-05, "loss": 1.5598, "step": 480 }, { "epoch": 0.91, "learning_rate": 2.8702040493332778e-05, "loss": 1.5618, "step": 490 }, { "epoch": 0.93, "learning_rate": 2.7980033169122454e-05, "loss": 1.5602, "step": 500 }, { "epoch": 0.95, "learning_rate": 2.7255495075456693e-05, "loss": 1.5453, "step": 510 }, { "epoch": 0.96, "learning_rate": 2.652904152054607e-05, "loss": 1.5557, "step": 520 }, { "epoch": 0.98, "learning_rate": 2.5801289439291388e-05, "loss": 1.5493, "step": 530 }, { "epoch": 1.0, "learning_rate": 2.5072856869356593e-05, "loss": 1.5527, "step": 540 }, { "epoch": 1.02, "learning_rate": 2.4344362426305255e-05, "loss": 1.5564, "step": 550 }, { "epoch": 1.04, "learning_rate": 2.3616424778246173e-05, "loss": 1.5514, "step": 560 }, { "epoch": 1.06, "learning_rate": 2.2889662120434453e-05, "loss": 1.5427, "step": 570 }, { "epoch": 1.08, "learning_rate": 2.216469165027406e-05, "loss": 1.5428, "step": 580 }, { "epoch": 1.09, "learning_rate": 2.1442129043167874e-05, "loss": 1.5441, "step": 590 }, { "epoch": 1.11, "learning_rate": 2.0722587929660227e-05, "loss": 1.552, "step": 600 }, { "epoch": 1.13, "learning_rate": 2.0006679374316062e-05, "loss": 1.5555, "step": 610 }, { "epoch": 1.15, "learning_rate": 1.9295011356779192e-05, "loss": 1.5504, "step": 620 }, { "epoch": 1.17, "learning_rate": 1.8588188255450466e-05, "loss": 1.5452, "step": 630 }, { "epoch": 1.19, "learning_rate": 1.7886810334224192e-05, "loss": 1.5502, "step": 640 }, { "epoch": 1.21, "learning_rate": 1.7191473232718774e-05, "loss": 1.556, "step": 650 }, { "epoch": 1.22, "learning_rate": 1.6502767460434588e-05, "loss": 1.5419, "step": 660 }, { "epoch": 1.24, "learning_rate": 1.582127789526838e-05, "loss": 1.5528, "step": 670 }, { "epoch": 1.26, "learning_rate": 1.5147583286810485e-05, "loss": 1.545, "step": 680 }, { "epoch": 1.28, "learning_rate": 1.4482255764846225e-05, "loss": 1.5433, "step": 690 }, { "epoch": 1.3, "learning_rate": 1.3825860353479336e-05, "loss": 1.551, "step": 700 }, { "epoch": 1.32, "learning_rate": 1.3178954491289692e-05, "loss": 1.5424, "step": 710 }, { "epoch": 1.34, "learning_rate": 1.2542087557933041e-05, "loss": 1.5426, "step": 720 }, { "epoch": 1.35, "learning_rate": 1.1915800407584704e-05, "loss": 1.5555, "step": 730 }, { "epoch": 1.37, "learning_rate": 1.1300624909623463e-05, "loss": 1.5335, "step": 740 }, { "epoch": 1.39, "learning_rate": 1.0697083496945765e-05, "loss": 1.5446, "step": 750 }, { "epoch": 1.41, "learning_rate": 1.0105688722293643e-05, "loss": 1.5422, "step": 760 }, { "epoch": 1.43, "learning_rate": 9.526942822973522e-06, "loss": 1.548, "step": 770 }, { "epoch": 1.45, "learning_rate": 8.961337294335021e-06, "loss": 1.5436, "step": 780 }, { "epoch": 1.46, "learning_rate": 8.409352472372595e-06, "loss": 1.5483, "step": 790 }, { "epoch": 1.48, "learning_rate": 7.871457125803896e-06, "loss": 1.5348, "step": 800 }, { "epoch": 1.5, "learning_rate": 7.348108057971728e-06, "loss": 1.5309, "step": 810 }, { "epoch": 1.52, "learning_rate": 6.839749718907429e-06, "loss": 1.5343, "step": 820 }, { "epoch": 1.54, "learning_rate": 6.3468138278852174e-06, "loss": 1.5446, "step": 830 }, { "epoch": 1.56, "learning_rate": 5.8697190067880325e-06, "loss": 1.5381, "step": 840 }, { "epoch": 1.58, "learning_rate": 5.408870424596238e-06, "loss": 1.5483, "step": 850 }, { "epoch": 1.59, "learning_rate": 4.9646594533010875e-06, "loss": 1.5466, "step": 860 }, { "epoch": 1.61, "learning_rate": 4.537463335535161e-06, "loss": 1.5397, "step": 870 }, { "epoch": 1.63, "learning_rate": 4.127644864202104e-06, "loss": 1.5355, "step": 880 }, { "epoch": 1.65, "learning_rate": 3.735552074377563e-06, "loss": 1.5409, "step": 890 }, { "epoch": 1.67, "learning_rate": 3.3615179477432645e-06, "loss": 1.5383, "step": 900 }, { "epoch": 1.69, "learning_rate": 3.0058601298048774e-06, "loss": 1.5338, "step": 910 }, { "epoch": 1.71, "learning_rate": 2.6688806601341765e-06, "loss": 1.5384, "step": 920 }, { "epoch": 1.72, "learning_rate": 2.350865715864278e-06, "loss": 1.5305, "step": 930 }, { "epoch": 1.74, "learning_rate": 2.0520853686560178e-06, "loss": 1.5344, "step": 940 }, { "epoch": 1.76, "learning_rate": 1.772793355341734e-06, "loss": 1.5363, "step": 950 }, { "epoch": 1.78, "learning_rate": 1.513226862441286e-06, "loss": 1.5468, "step": 960 }, { "epoch": 1.8, "learning_rate": 1.273606324733284e-06, "loss": 1.5369, "step": 970 }, { "epoch": 1.82, "learning_rate": 1.0541352380526087e-06, "loss": 1.5428, "step": 980 }, { "epoch": 1.84, "learning_rate": 8.549999864732011e-07, "loss": 1.5338, "step": 990 }, { "epoch": 1.85, "learning_rate": 6.763696840228456e-07, "loss": 1.5399, "step": 1000 }, { "epoch": 1.87, "learning_rate": 5.183960310644748e-07, "loss": 1.5406, "step": 1010 }, { "epoch": 1.89, "learning_rate": 3.812131854657813e-07, "loss": 1.5381, "step": 1020 }, { "epoch": 1.91, "learning_rate": 2.649376486667743e-07, "loss": 1.5354, "step": 1030 }, { "epoch": 1.93, "learning_rate": 1.6966816674182373e-07, "loss": 1.5358, "step": 1040 }, { "epoch": 1.95, "learning_rate": 1.019530491929821e-07, "loss": 1.5481, "step": 1050 }, { "epoch": 1.97, "learning_rate": 4.680317016582669e-08, "loss": 1.5504, "step": 1060 }, { "epoch": 1.98, "learning_rate": 1.284459498280266e-08, "loss": 1.5429, "step": 1070 }, { "epoch": 2.0, "step": 1078, "total_flos": 2.684311584557236e+18, "train_loss": 1.5770846359804964, "train_runtime": 13419.9836, "train_samples_per_second": 10.287, "train_steps_per_second": 0.08 } ], "max_steps": 1078, "num_train_epochs": 2, "total_flos": 2.684311584557236e+18, "trial_name": null, "trial_params": null }