|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1425801300330786, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.999954848325688e-06, |
|
"loss": 0.7277, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.999819394208827e-06, |
|
"loss": 0.6455, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.999593640367633e-06, |
|
"loss": 0.5428, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.9992775913324e-06, |
|
"loss": 0.3486, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.998871253445414e-06, |
|
"loss": 0.3524, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.998374634860822e-06, |
|
"loss": 0.2701, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.997787745544478e-06, |
|
"loss": 0.2106, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.997110597273727e-06, |
|
"loss": 0.2596, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.996343203637181e-06, |
|
"loss": 0.2015, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.995485580034448e-06, |
|
"loss": 0.2462, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.99453774367581e-06, |
|
"loss": 0.2172, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.993499713581887e-06, |
|
"loss": 0.2655, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.992371510583257e-06, |
|
"loss": 0.1823, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.991153157320033e-06, |
|
"loss": 0.227, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.989844678241412e-06, |
|
"loss": 0.2243, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.988446099605179e-06, |
|
"loss": 0.1787, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.986957449477188e-06, |
|
"loss": 0.2211, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.98537875773079e-06, |
|
"loss": 0.1683, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.983710056046243e-06, |
|
"loss": 0.1879, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.98195137791007e-06, |
|
"loss": 0.1809, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.980102758614384e-06, |
|
"loss": 0.2204, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.978164235256191e-06, |
|
"loss": 0.189, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.976135846736634e-06, |
|
"loss": 0.1389, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.97401763376022e-06, |
|
"loss": 0.1231, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.971809638833998e-06, |
|
"loss": 0.2018, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.96951190626671e-06, |
|
"loss": 0.1271, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.967124482167896e-06, |
|
"loss": 0.1364, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.964647414446981e-06, |
|
"loss": 0.1658, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.9620807528123e-06, |
|
"loss": 0.1479, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.959424548770104e-06, |
|
"loss": 0.1353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.95667885562353e-06, |
|
"loss": 0.1761, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.953843728471533e-06, |
|
"loss": 0.1391, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.950919224207774e-06, |
|
"loss": 0.1776, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.94790540151948e-06, |
|
"loss": 0.2089, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.94480232088627e-06, |
|
"loss": 0.1368, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.941610044578937e-06, |
|
"loss": 0.1501, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.938328636658202e-06, |
|
"loss": 0.1807, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.934958162973425e-06, |
|
"loss": 0.13, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.931498691161287e-06, |
|
"loss": 0.1437, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.92795029064443e-06, |
|
"loss": 0.0828, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.924313032630063e-06, |
|
"loss": 0.1041, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.920586990108539e-06, |
|
"loss": 0.2663, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.916772237851882e-06, |
|
"loss": 0.2055, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.912868852412294e-06, |
|
"loss": 0.151, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.908876912120614e-06, |
|
"loss": 0.1638, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.904796497084747e-06, |
|
"loss": 0.1399, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.900627689188059e-06, |
|
"loss": 0.1615, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.89637057208773e-06, |
|
"loss": 0.1209, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.892025231213077e-06, |
|
"loss": 0.1605, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.887591753763842e-06, |
|
"loss": 0.124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 0.13451002538204193, |
|
"eval_runtime": 446.6893, |
|
"eval_samples_per_second": 52.338, |
|
"eval_steps_per_second": 8.724, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.883070228708436e-06, |
|
"loss": 0.1045, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.878460746782163e-06, |
|
"loss": 0.2208, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.873763400485387e-06, |
|
"loss": 0.1483, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.868978284081688e-06, |
|
"loss": 0.1322, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.864105493595962e-06, |
|
"loss": 0.1227, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.859145126812497e-06, |
|
"loss": 0.1371, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.854097283273012e-06, |
|
"loss": 0.1909, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.848962064274655e-06, |
|
"loss": 0.0993, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.843739572867977e-06, |
|
"loss": 0.1053, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.838429913854859e-06, |
|
"loss": 0.1401, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.83303319378641e-06, |
|
"loss": 0.09, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.827549520960827e-06, |
|
"loss": 0.1667, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.821979005421223e-06, |
|
"loss": 0.1069, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.816321758953426e-06, |
|
"loss": 0.1659, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.810577895083726e-06, |
|
"loss": 0.1309, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.804747529076592e-06, |
|
"loss": 0.16, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.79883077793238e-06, |
|
"loss": 0.1041, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.792827760384965e-06, |
|
"loss": 0.1344, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.786738596899362e-06, |
|
"loss": 0.0922, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.780563409669322e-06, |
|
"loss": 0.0813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.774302322614858e-06, |
|
"loss": 0.1795, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.767955461379783e-06, |
|
"loss": 0.1409, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.761522953329163e-06, |
|
"loss": 0.1362, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.75500492754678e-06, |
|
"loss": 0.1227, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.74840151483254e-06, |
|
"loss": 0.0643, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.741712847699832e-06, |
|
"loss": 0.1895, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.734939060372892e-06, |
|
"loss": 0.0959, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.72808028878409e-06, |
|
"loss": 0.1089, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.721136670571216e-06, |
|
"loss": 0.0959, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.714108345074704e-06, |
|
"loss": 0.1264, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.706995453334855e-06, |
|
"loss": 0.1078, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.699798138088988e-06, |
|
"loss": 0.1278, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.692516543768581e-06, |
|
"loss": 0.1337, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.685150816496384e-06, |
|
"loss": 0.0815, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.677701104083467e-06, |
|
"loss": 0.1033, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.670167556026274e-06, |
|
"loss": 0.1412, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.662550323503606e-06, |
|
"loss": 0.197, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.654849559373595e-06, |
|
"loss": 0.1118, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.647065418170644e-06, |
|
"loss": 0.1132, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.639198056102309e-06, |
|
"loss": 0.0928, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.631247631046176e-06, |
|
"loss": 0.0723, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.623214302546691e-06, |
|
"loss": 0.126, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.615098231811958e-06, |
|
"loss": 0.1363, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.606899581710505e-06, |
|
"loss": 0.1243, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.598618516768005e-06, |
|
"loss": 0.0382, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.590255203163995e-06, |
|
"loss": 0.1295, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.581809808728524e-06, |
|
"loss": 0.1145, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.573282502938791e-06, |
|
"loss": 0.0915, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.564673456915746e-06, |
|
"loss": 0.1918, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.55598284342065e-06, |
|
"loss": 0.1732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 0.12965914607048035, |
|
"eval_runtime": 448.5256, |
|
"eval_samples_per_second": 52.124, |
|
"eval_steps_per_second": 8.688, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7013, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|