|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 6717, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.94789081885856e-08, |
|
"loss": 4.9627, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4888337468982628e-07, |
|
"loss": 4.7244, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.28287841191067e-07, |
|
"loss": 4.2955, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.076923076923077e-07, |
|
"loss": 3.4769, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.8709677419354837e-07, |
|
"loss": 2.602, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.665012406947891e-07, |
|
"loss": 1.7352, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.459057071960298e-07, |
|
"loss": 1.5972, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.253101736972705e-07, |
|
"loss": 1.4621, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.047146401985111e-07, |
|
"loss": 1.2769, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.841191066997518e-07, |
|
"loss": 0.9435, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.635235732009926e-07, |
|
"loss": 0.4583, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.429280397022333e-07, |
|
"loss": 0.4589, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.985745961355717e-07, |
|
"loss": 0.456, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.935064935064936e-07, |
|
"loss": 0.3103, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.884383908774152e-07, |
|
"loss": 0.4253, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.83370288248337e-07, |
|
"loss": 0.363, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.783021856192588e-07, |
|
"loss": 0.2691, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.732340829901805e-07, |
|
"loss": 0.2752, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.681659803611024e-07, |
|
"loss": 0.292, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.63097877732024e-07, |
|
"loss": 0.3344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.580297751029457e-07, |
|
"loss": 0.3355, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.529616724738676e-07, |
|
"loss": 0.3746, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.478935698447893e-07, |
|
"loss": 0.3536, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.428254672157111e-07, |
|
"loss": 0.2955, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.377573645866328e-07, |
|
"loss": 0.3463, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.326892619575546e-07, |
|
"loss": 0.4049, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.276211593284764e-07, |
|
"loss": 0.3387, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.225530566993981e-07, |
|
"loss": 0.3181, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.174849540703199e-07, |
|
"loss": 0.2915, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.125752296484004e-07, |
|
"loss": 0.298, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.075071270193221e-07, |
|
"loss": 0.3243, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.024390243902439e-07, |
|
"loss": 0.2464, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.973709217611656e-07, |
|
"loss": 0.3151, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.923028191320873e-07, |
|
"loss": 0.3568, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.872347165030091e-07, |
|
"loss": 0.2174, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.821666138739309e-07, |
|
"loss": 0.3157, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.770985112448527e-07, |
|
"loss": 0.3387, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.720304086157745e-07, |
|
"loss": 0.3334, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.669623059866962e-07, |
|
"loss": 0.2684, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.618942033576179e-07, |
|
"loss": 0.2936, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.568261007285396e-07, |
|
"loss": 0.3284, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.517579980994615e-07, |
|
"loss": 0.1762, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.466898954703833e-07, |
|
"loss": 0.3034, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.41621792841305e-07, |
|
"loss": 0.3374, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.365536902122268e-07, |
|
"loss": 0.2804, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.314855875831485e-07, |
|
"loss": 0.2602, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.264174849540702e-07, |
|
"loss": 0.3025, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.213493823249921e-07, |
|
"loss": 0.3233, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.162812796959138e-07, |
|
"loss": 0.3104, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.112131770668356e-07, |
|
"loss": 0.3905, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.061450744377573e-07, |
|
"loss": 0.2828, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.01076971808679e-07, |
|
"loss": 0.2781, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.960088691796008e-07, |
|
"loss": 0.2269, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.909407665505227e-07, |
|
"loss": 0.2633, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.858726639214444e-07, |
|
"loss": 0.3526, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.808045612923662e-07, |
|
"loss": 0.2754, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.757364586632878e-07, |
|
"loss": 0.268, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.706683560342096e-07, |
|
"loss": 0.3112, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.656002534051315e-07, |
|
"loss": 0.2976, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.605321507760532e-07, |
|
"loss": 0.2428, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.55464048146975e-07, |
|
"loss": 0.2446, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.503959455178968e-07, |
|
"loss": 0.2443, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.453278428888184e-07, |
|
"loss": 0.2407, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.402597402597402e-07, |
|
"loss": 0.2782, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.351916376306619e-07, |
|
"loss": 0.5238, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.301235350015838e-07, |
|
"loss": 0.2876, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.250554323725056e-07, |
|
"loss": 0.3338, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.199873297434272e-07, |
|
"loss": 0.2832, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.14919227114349e-07, |
|
"loss": 0.3335, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.098511244852708e-07, |
|
"loss": 0.2526, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.047830218561926e-07, |
|
"loss": 0.2588, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.99873297434273e-07, |
|
"loss": 0.3761, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.948051948051947e-07, |
|
"loss": 0.2686, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.897370921761165e-07, |
|
"loss": 0.2223, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.846689895470384e-07, |
|
"loss": 0.3035, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.796008869179601e-07, |
|
"loss": 0.3097, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.745327842888818e-07, |
|
"loss": 0.2795, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.694646816598036e-07, |
|
"loss": 0.2274, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.643965790307253e-07, |
|
"loss": 0.2918, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.593284764016471e-07, |
|
"loss": 0.2982, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.542603737725689e-07, |
|
"loss": 0.2517, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.491922711434906e-07, |
|
"loss": 0.2184, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.441241685144124e-07, |
|
"loss": 0.2715, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.390560658853341e-07, |
|
"loss": 0.2155, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.339879632562559e-07, |
|
"loss": 0.2686, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.289198606271777e-07, |
|
"loss": 0.2818, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.238517579980995e-07, |
|
"loss": 0.2865, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.187836553690212e-07, |
|
"loss": 0.2123, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.137155527399429e-07, |
|
"loss": 0.2663, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.086474501108647e-07, |
|
"loss": 0.2821, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.035793474817865e-07, |
|
"loss": 0.3285, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.985112448527082e-07, |
|
"loss": 0.2123, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.934431422236301e-07, |
|
"loss": 0.2753, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.883750395945518e-07, |
|
"loss": 0.2082, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.833069369654735e-07, |
|
"loss": 0.2989, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.782388343363953e-07, |
|
"loss": 0.2268, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.73170731707317e-07, |
|
"loss": 0.2775, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.681026290782388e-07, |
|
"loss": 0.2643, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.630345264491606e-07, |
|
"loss": 0.1845, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.579664238200823e-07, |
|
"loss": 0.282, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.528983211910041e-07, |
|
"loss": 0.3109, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.478302185619259e-07, |
|
"loss": 0.2203, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.427621159328476e-07, |
|
"loss": 0.2803, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.376940133037694e-07, |
|
"loss": 0.298, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.326259106746911e-07, |
|
"loss": 0.2953, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.275578080456129e-07, |
|
"loss": 0.2844, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.224897054165347e-07, |
|
"loss": 0.2202, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.174216027874564e-07, |
|
"loss": 0.2211, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.123535001583782e-07, |
|
"loss": 0.3406, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.072853975292999e-07, |
|
"loss": 0.2894, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.022172949002217e-07, |
|
"loss": 0.2595, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.971491922711435e-07, |
|
"loss": 0.2943, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.920810896420652e-07, |
|
"loss": 0.2118, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.87012987012987e-07, |
|
"loss": 0.2267, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.819448843839088e-07, |
|
"loss": 0.2574, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.768767817548305e-07, |
|
"loss": 0.2322, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7180867912575227e-07, |
|
"loss": 0.2427, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.6674057649667405e-07, |
|
"loss": 0.252, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.616724738675958e-07, |
|
"loss": 0.2369, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5660437123851757e-07, |
|
"loss": 0.2179, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.5153626860943935e-07, |
|
"loss": 0.2108, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.464681659803611e-07, |
|
"loss": 0.1787, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.414000633512828e-07, |
|
"loss": 0.281, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.3633196072220465e-07, |
|
"loss": 0.3145, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.312638580931264e-07, |
|
"loss": 0.2586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.261957554640481e-07, |
|
"loss": 0.2548, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.211276528349699e-07, |
|
"loss": 0.3165, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.160595502058917e-07, |
|
"loss": 0.1822, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.109914475768134e-07, |
|
"loss": 0.1949, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.059233449477352e-07, |
|
"loss": 0.3197, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.008552423186569e-07, |
|
"loss": 0.2209, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.957871396895787e-07, |
|
"loss": 0.1941, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.907190370605005e-07, |
|
"loss": 0.2412, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.856509344314222e-07, |
|
"loss": 0.3301, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8058283180234395e-07, |
|
"loss": 0.3845, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.755147291732658e-07, |
|
"loss": 0.2557, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.704466265441875e-07, |
|
"loss": 0.2228, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6537852391510925e-07, |
|
"loss": 0.2608, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6031042128603103e-07, |
|
"loss": 0.1698, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.552423186569528e-07, |
|
"loss": 0.2359, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.5017421602787454e-07, |
|
"loss": 0.2657, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.451061133987963e-07, |
|
"loss": 0.2521, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.4003801076971806e-07, |
|
"loss": 0.216, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.3496990814063984e-07, |
|
"loss": 0.3104, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.299018055115616e-07, |
|
"loss": 0.2136, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.2483370288248335e-07, |
|
"loss": 0.2443, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.197656002534051e-07, |
|
"loss": 0.1771, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.146974976243269e-07, |
|
"loss": 0.187, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0962939499524865e-07, |
|
"loss": 0.2901, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.045612923661704e-07, |
|
"loss": 0.4018, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9949318973709216e-07, |
|
"loss": 0.2795, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9442508710801395e-07, |
|
"loss": 0.2578, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.893569844789357e-07, |
|
"loss": 0.1746, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.8428888184985746e-07, |
|
"loss": 0.2534, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.792207792207792e-07, |
|
"loss": 0.3677, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.74152676591701e-07, |
|
"loss": 0.2953, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6908457396262276e-07, |
|
"loss": 0.2722, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.640164713335445e-07, |
|
"loss": 0.2797, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.589483687044662e-07, |
|
"loss": 0.2707, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5388026607538806e-07, |
|
"loss": 0.3243, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.488121634463098e-07, |
|
"loss": 0.271, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.437440608172315e-07, |
|
"loss": 0.2899, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.386759581881533e-07, |
|
"loss": 0.3117, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3360785555907506e-07, |
|
"loss": 0.2323, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2853975292999684e-07, |
|
"loss": 0.2318, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2347165030091857e-07, |
|
"loss": 0.29, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1840354767184035e-07, |
|
"loss": 0.2406, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.133354450427621e-07, |
|
"loss": 0.2292, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0826734241368387e-07, |
|
"loss": 0.3598, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0319923978460563e-07, |
|
"loss": 0.258, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.981311371555274e-07, |
|
"loss": 0.2904, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9306303452644914e-07, |
|
"loss": 0.3177, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8799493189737092e-07, |
|
"loss": 0.3294, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8292682926829268e-07, |
|
"loss": 0.2415, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7785872663921444e-07, |
|
"loss": 0.2808, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.727906240101362e-07, |
|
"loss": 0.2563, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6772252138105798e-07, |
|
"loss": 0.2069, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.626544187519797e-07, |
|
"loss": 0.2112, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.575863161229015e-07, |
|
"loss": 0.1768, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5251821349382325e-07, |
|
"loss": 0.2977, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.47450110864745e-07, |
|
"loss": 0.2511, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4238200823566676e-07, |
|
"loss": 0.2203, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.3731390560658854e-07, |
|
"loss": 0.1882, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.3224580297751027e-07, |
|
"loss": 0.3103, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2717770034843206e-07, |
|
"loss": 0.2475, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2210959771935381e-07, |
|
"loss": 0.2512, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1704149509027557e-07, |
|
"loss": 0.2936, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1197339246119733e-07, |
|
"loss": 0.2215, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0690528983211909e-07, |
|
"loss": 0.2263, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0183718720304086e-07, |
|
"loss": 0.2768, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.676908457396261e-08, |
|
"loss": 0.2535, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.170098194488437e-08, |
|
"loss": 0.195, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.663287931580614e-08, |
|
"loss": 0.1955, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.15647766867279e-08, |
|
"loss": 0.355, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.649667405764967e-08, |
|
"loss": 0.2593, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.142857142857142e-08, |
|
"loss": 0.3068, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.636046879949318e-08, |
|
"loss": 0.2366, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.129236617041495e-08, |
|
"loss": 0.2361, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.6224263541336714e-08, |
|
"loss": 0.2616, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.115616091225847e-08, |
|
"loss": 0.2597, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6088058283180234e-08, |
|
"loss": 0.3039, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.1019955654102e-08, |
|
"loss": 0.2453, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.595185302502376e-08, |
|
"loss": 0.3413, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.088375039594552e-08, |
|
"loss": 0.2654, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.581564776686728e-08, |
|
"loss": 0.2189, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.074754513778904e-08, |
|
"loss": 0.257, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.56794425087108e-08, |
|
"loss": 0.25, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0611339879632562e-08, |
|
"loss": 0.3328, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.543237250554324e-09, |
|
"loss": 0.2794, |
|
"step": 6688 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 6717, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 6717, |
|
"total_flos": 2.85226794934272e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|