|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8544274455261938, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.998938447446803e-05, |
|
"loss": 2.0078, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9957546913022665e-05, |
|
"loss": 1.8414, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9904514353459654e-05, |
|
"loss": 1.7763, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.983033183325818e-05, |
|
"loss": 1.7278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.973506235133323e-05, |
|
"loss": 1.7139, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.96313569658781e-05, |
|
"loss": 1.6947, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.94962599008322e-05, |
|
"loss": 1.6749, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9340359581993066e-05, |
|
"loss": 1.6579, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.916378840646592e-05, |
|
"loss": 1.6499, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8966696325916515e-05, |
|
"loss": 1.6507, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8749250719225915e-05, |
|
"loss": 1.6426, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8511636250345294e-05, |
|
"loss": 1.6288, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.825405471147153e-05, |
|
"loss": 1.6265, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.797672485167683e-05, |
|
"loss": 1.6252, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7679882191137804e-05, |
|
"loss": 1.6218, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7363778821121784e-05, |
|
"loss": 1.6146, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.702868318990039e-05, |
|
"loss": 1.604, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6674879874771926e-05, |
|
"loss": 1.607, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.630266934038642e-05, |
|
"loss": 1.5972, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.591236768357833e-05, |
|
"loss": 1.5973, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.55043063649239e-05, |
|
"loss": 1.6043, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.507883192725089e-05, |
|
"loss": 1.5839, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.463630570133978e-05, |
|
"loss": 1.5923, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.417710349906658e-05, |
|
"loss": 1.5841, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.3701615294247465e-05, |
|
"loss": 1.5977, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.321024489145673e-05, |
|
"loss": 1.5954, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.270340958309888e-05, |
|
"loss": 1.5914, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.2181539795026435e-05, |
|
"loss": 1.577, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1645078721004174e-05, |
|
"loss": 1.576, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.109448194633033e-05, |
|
"loss": 1.5771, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0530217060934466e-05, |
|
"loss": 1.5825, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9952763262280405e-05, |
|
"loss": 1.581, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9362610948411585e-05, |
|
"loss": 1.5691, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8760261301484466e-05, |
|
"loss": 1.5795, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.8208140480771856e-05, |
|
"loss": 1.5847, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.758403340686345e-05, |
|
"loss": 1.5737, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.6949239442720976e-05, |
|
"loss": 1.5645, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.6304297682067144e-05, |
|
"loss": 1.5659, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.5649755836560106e-05, |
|
"loss": 1.5613, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4986169770653685e-05, |
|
"loss": 1.5725, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.431410302953389e-05, |
|
"loss": 1.5619, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.363412636053269e-05, |
|
"loss": 1.5615, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.294681722842537e-05, |
|
"loss": 1.5623, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.225275932502315e-05, |
|
"loss": 1.5685, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.1552542073477555e-05, |
|
"loss": 1.566, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.084676012771753e-05, |
|
"loss": 1.5643, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.0136012867444297e-05, |
|
"loss": 1.5588, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.942090388911291e-05, |
|
"loss": 1.5598, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8702040493332778e-05, |
|
"loss": 1.5618, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7980033169122454e-05, |
|
"loss": 1.5602, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.7255495075456693e-05, |
|
"loss": 1.5453, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.652904152054607e-05, |
|
"loss": 1.5557, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5801289439291388e-05, |
|
"loss": 1.5493, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5072856869356593e-05, |
|
"loss": 1.5527, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4344362426305255e-05, |
|
"loss": 1.5564, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3616424778246173e-05, |
|
"loss": 1.5514, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2889662120434453e-05, |
|
"loss": 1.5427, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.216469165027406e-05, |
|
"loss": 1.5428, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.1442129043167874e-05, |
|
"loss": 1.5441, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0722587929660227e-05, |
|
"loss": 1.552, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.0006679374316062e-05, |
|
"loss": 1.5555, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.9295011356779192e-05, |
|
"loss": 1.5504, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8588188255450466e-05, |
|
"loss": 1.5452, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.7886810334224192e-05, |
|
"loss": 1.5502, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.7191473232718774e-05, |
|
"loss": 1.556, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.6502767460434588e-05, |
|
"loss": 1.5419, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.582127789526838e-05, |
|
"loss": 1.5528, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.5147583286810485e-05, |
|
"loss": 1.545, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4482255764846225e-05, |
|
"loss": 1.5433, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.3825860353479336e-05, |
|
"loss": 1.551, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.3178954491289692e-05, |
|
"loss": 1.5424, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2542087557933041e-05, |
|
"loss": 1.5426, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1915800407584704e-05, |
|
"loss": 1.5555, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1300624909623463e-05, |
|
"loss": 1.5335, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0697083496945765e-05, |
|
"loss": 1.5446, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0105688722293643e-05, |
|
"loss": 1.5422, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.526942822973522e-06, |
|
"loss": 1.548, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.961337294335021e-06, |
|
"loss": 1.5436, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.409352472372595e-06, |
|
"loss": 1.5483, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.871457125803896e-06, |
|
"loss": 1.5348, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.348108057971728e-06, |
|
"loss": 1.5309, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.839749718907429e-06, |
|
"loss": 1.5343, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.3468138278852174e-06, |
|
"loss": 1.5446, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5.8697190067880325e-06, |
|
"loss": 1.5381, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.408870424596238e-06, |
|
"loss": 1.5483, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.9646594533010875e-06, |
|
"loss": 1.5466, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.537463335535161e-06, |
|
"loss": 1.5397, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.127644864202104e-06, |
|
"loss": 1.5355, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.735552074377563e-06, |
|
"loss": 1.5409, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.3615179477432645e-06, |
|
"loss": 1.5383, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0058601298048774e-06, |
|
"loss": 1.5338, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.6688806601341765e-06, |
|
"loss": 1.5384, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.350865715864278e-06, |
|
"loss": 1.5305, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0520853686560178e-06, |
|
"loss": 1.5344, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.772793355341734e-06, |
|
"loss": 1.5363, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.513226862441286e-06, |
|
"loss": 1.5468, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.273606324733284e-06, |
|
"loss": 1.5369, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.0541352380526087e-06, |
|
"loss": 1.5428, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.549999864732011e-07, |
|
"loss": 1.5338, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.763696840228456e-07, |
|
"loss": 1.5399, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 1078, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.489434826550018e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|