|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.28653295128939826, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0028653295128939827, |
|
"grad_norm": 0.18870845437049866, |
|
"learning_rate": 1e-05, |
|
"loss": 10.3647, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0028653295128939827, |
|
"eval_loss": 10.36025333404541, |
|
"eval_runtime": 1.0201, |
|
"eval_samples_per_second": 288.212, |
|
"eval_steps_per_second": 36.272, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0057306590257879654, |
|
"grad_norm": 0.21544590592384338, |
|
"learning_rate": 2e-05, |
|
"loss": 10.362, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008595988538681949, |
|
"grad_norm": 0.21012459695339203, |
|
"learning_rate": 3e-05, |
|
"loss": 10.3787, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011461318051575931, |
|
"grad_norm": 0.17221978306770325, |
|
"learning_rate": 4e-05, |
|
"loss": 10.3589, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014326647564469915, |
|
"grad_norm": 0.19953270256519318, |
|
"learning_rate": 5e-05, |
|
"loss": 10.3518, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017191977077363897, |
|
"grad_norm": 0.2154458910226822, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3715, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02005730659025788, |
|
"grad_norm": 0.18448343873023987, |
|
"learning_rate": 7e-05, |
|
"loss": 10.3615, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022922636103151862, |
|
"grad_norm": 0.17267638444900513, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3744, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.025787965616045846, |
|
"grad_norm": 0.21264782547950745, |
|
"learning_rate": 9e-05, |
|
"loss": 10.3635, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.025787965616045846, |
|
"eval_loss": 10.357918739318848, |
|
"eval_runtime": 1.0313, |
|
"eval_samples_per_second": 285.087, |
|
"eval_steps_per_second": 35.878, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02865329512893983, |
|
"grad_norm": 0.20090581476688385, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3682, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03151862464183381, |
|
"grad_norm": 0.2009212225675583, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 10.3545, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.034383954154727794, |
|
"grad_norm": 0.17764975130558014, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 10.3667, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03724928366762178, |
|
"grad_norm": 0.19756364822387695, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 10.3524, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04011461318051576, |
|
"grad_norm": 0.21011871099472046, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 10.3613, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04297994269340974, |
|
"grad_norm": 0.20613764226436615, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 10.3708, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.045845272206303724, |
|
"grad_norm": 0.27395445108413696, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 10.347, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04871060171919771, |
|
"grad_norm": 0.20352178812026978, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 10.3532, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05157593123209169, |
|
"grad_norm": 0.2129252851009369, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.354, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05157593123209169, |
|
"eval_loss": 10.35179328918457, |
|
"eval_runtime": 1.0103, |
|
"eval_samples_per_second": 290.994, |
|
"eval_steps_per_second": 36.622, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.054441260744985676, |
|
"grad_norm": 0.1703406721353531, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.3596, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05730659025787966, |
|
"grad_norm": 0.2214614599943161, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 10.3457, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06017191977077364, |
|
"grad_norm": 0.2195059061050415, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.3563, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06303724928366762, |
|
"grad_norm": 0.21687962114810944, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 10.3525, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0659025787965616, |
|
"grad_norm": 0.20921839773654938, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 10.3405, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06876790830945559, |
|
"grad_norm": 0.21130795776844025, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 10.3536, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07163323782234957, |
|
"grad_norm": 0.22695602476596832, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 10.3385, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07449856733524356, |
|
"grad_norm": 0.2284359186887741, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 10.3517, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07736389684813753, |
|
"grad_norm": 0.22772260010242462, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 10.3457, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07736389684813753, |
|
"eval_loss": 10.345144271850586, |
|
"eval_runtime": 1.0637, |
|
"eval_samples_per_second": 276.397, |
|
"eval_steps_per_second": 34.785, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08022922636103152, |
|
"grad_norm": 0.24294577538967133, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.3317, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0830945558739255, |
|
"grad_norm": 0.2539214491844177, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 10.3381, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08595988538681948, |
|
"grad_norm": 0.20477065443992615, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.3582, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08882521489971347, |
|
"grad_norm": 0.21697883307933807, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 10.3367, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09169054441260745, |
|
"grad_norm": 0.29203030467033386, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 10.3243, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09455587392550144, |
|
"grad_norm": 0.23186412453651428, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.3584, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09742120343839542, |
|
"grad_norm": 0.2152336984872818, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 10.3469, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10028653295128939, |
|
"grad_norm": 0.2645852267742157, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 10.3416, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10315186246418338, |
|
"grad_norm": 0.19434252381324768, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.3436, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10315186246418338, |
|
"eval_loss": 10.338038444519043, |
|
"eval_runtime": 1.0423, |
|
"eval_samples_per_second": 282.056, |
|
"eval_steps_per_second": 35.497, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10601719197707736, |
|
"grad_norm": 0.2565288245677948, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.3424, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10888252148997135, |
|
"grad_norm": 0.3266773819923401, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 10.3231, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11174785100286533, |
|
"grad_norm": 0.30517715215682983, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 10.3282, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11461318051575932, |
|
"grad_norm": 0.213883176445961, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 10.3301, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1174785100286533, |
|
"grad_norm": 0.23180383443832397, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 10.337, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12034383954154727, |
|
"grad_norm": 0.2547537088394165, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 10.3492, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12320916905444126, |
|
"grad_norm": 0.3248274326324463, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 10.326, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12607449856733524, |
|
"grad_norm": 0.28297948837280273, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 10.3352, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12893982808022922, |
|
"grad_norm": 0.2600502371788025, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.3567, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12893982808022922, |
|
"eval_loss": 10.33073902130127, |
|
"eval_runtime": 1.0349, |
|
"eval_samples_per_second": 284.076, |
|
"eval_steps_per_second": 35.751, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1318051575931232, |
|
"grad_norm": 0.2766321301460266, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.3338, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1346704871060172, |
|
"grad_norm": 0.2638920843601227, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 10.339, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13753581661891118, |
|
"grad_norm": 0.26153671741485596, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 10.3352, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14040114613180515, |
|
"grad_norm": 0.27219700813293457, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 10.3293, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14326647564469913, |
|
"grad_norm": 0.2812553942203522, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 10.3356, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14613180515759314, |
|
"grad_norm": 0.26205867528915405, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 10.3289, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1489971346704871, |
|
"grad_norm": 0.2941390573978424, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 10.3359, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1518624641833811, |
|
"grad_norm": 0.30991968512535095, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 10.3291, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15472779369627507, |
|
"grad_norm": 0.26404446363449097, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 10.324, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15472779369627507, |
|
"eval_loss": 10.3239107131958, |
|
"eval_runtime": 1.0645, |
|
"eval_samples_per_second": 276.186, |
|
"eval_steps_per_second": 34.758, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15759312320916904, |
|
"grad_norm": 0.23177841305732727, |
|
"learning_rate": 5e-05, |
|
"loss": 10.3431, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16045845272206305, |
|
"grad_norm": 0.2693847715854645, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 10.3345, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16332378223495703, |
|
"grad_norm": 0.315563827753067, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 10.3337, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.166189111747851, |
|
"grad_norm": 0.2582322955131531, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 10.3285, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16905444126074498, |
|
"grad_norm": 0.38031986355781555, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 10.307, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17191977077363896, |
|
"grad_norm": 0.33206507563591003, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 10.3213, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17478510028653296, |
|
"grad_norm": 0.2936471402645111, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 10.3271, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17765042979942694, |
|
"grad_norm": 0.320385217666626, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 10.3251, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.18051575931232092, |
|
"grad_norm": 0.3414050340652466, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 10.3106, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18051575931232092, |
|
"eval_loss": 10.318222999572754, |
|
"eval_runtime": 1.0447, |
|
"eval_samples_per_second": 281.419, |
|
"eval_steps_per_second": 35.417, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1833810888252149, |
|
"grad_norm": 0.29449090361595154, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 10.3208, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18624641833810887, |
|
"grad_norm": 0.318759560585022, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 10.327, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18911174785100288, |
|
"grad_norm": 0.32590019702911377, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 10.3242, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19197707736389685, |
|
"grad_norm": 0.3307969570159912, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 10.3131, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.19484240687679083, |
|
"grad_norm": 0.270192950963974, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 10.3285, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1977077363896848, |
|
"grad_norm": 0.3431764245033264, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 10.3281, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.20057306590257878, |
|
"grad_norm": 0.27470043301582336, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 10.3218, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2034383954154728, |
|
"grad_norm": 0.29296571016311646, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 10.3186, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.20630372492836677, |
|
"grad_norm": 0.335977166891098, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 10.3168, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20630372492836677, |
|
"eval_loss": 10.314079284667969, |
|
"eval_runtime": 1.025, |
|
"eval_samples_per_second": 286.826, |
|
"eval_steps_per_second": 36.097, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20916905444126074, |
|
"grad_norm": 0.3692624270915985, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 10.3224, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21203438395415472, |
|
"grad_norm": 0.31396710872650146, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 10.3251, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2148997134670487, |
|
"grad_norm": 0.32838553190231323, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 10.3128, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2177650429799427, |
|
"grad_norm": 0.3165194094181061, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 10.3204, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22063037249283668, |
|
"grad_norm": 0.3308541476726532, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 10.3179, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.22349570200573066, |
|
"grad_norm": 0.3327239453792572, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 10.312, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22636103151862463, |
|
"grad_norm": 0.3000035583972931, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 10.3087, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22922636103151864, |
|
"grad_norm": 0.32877233624458313, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 10.3161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23209169054441262, |
|
"grad_norm": 0.3862011730670929, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 10.3023, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.23209169054441262, |
|
"eval_loss": 10.311718940734863, |
|
"eval_runtime": 1.0872, |
|
"eval_samples_per_second": 270.408, |
|
"eval_steps_per_second": 34.031, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2349570200573066, |
|
"grad_norm": 0.33126261830329895, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 10.3185, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.23782234957020057, |
|
"grad_norm": 0.29596734046936035, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 10.3169, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.24068767908309455, |
|
"grad_norm": 0.32354721426963806, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 10.3139, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.24355300859598855, |
|
"grad_norm": 0.32435595989227295, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 10.3234, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24641833810888253, |
|
"grad_norm": 0.3075031340122223, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 10.3194, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2492836676217765, |
|
"grad_norm": 0.38141223788261414, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 10.3027, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2521489971346705, |
|
"grad_norm": 0.3392947018146515, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 10.3089, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.25501432664756446, |
|
"grad_norm": 0.3004538118839264, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 10.3138, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.25787965616045844, |
|
"grad_norm": 0.3904876410961151, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 10.3068, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25787965616045844, |
|
"eval_loss": 10.310718536376953, |
|
"eval_runtime": 1.0615, |
|
"eval_samples_per_second": 276.979, |
|
"eval_steps_per_second": 34.858, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2607449856733524, |
|
"grad_norm": 0.3297453224658966, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 10.3213, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2636103151862464, |
|
"grad_norm": 0.35301482677459717, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 10.3242, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2664756446991404, |
|
"grad_norm": 0.37583693861961365, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 10.3206, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2693409742120344, |
|
"grad_norm": 0.4035826623439789, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 10.2922, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2722063037249284, |
|
"grad_norm": 0.33306726813316345, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 10.314, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.27507163323782235, |
|
"grad_norm": 0.38546448945999146, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 10.3188, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.27793696275071633, |
|
"grad_norm": 0.3463270962238312, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 10.3203, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2808022922636103, |
|
"grad_norm": 0.33884936571121216, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 10.3175, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2836676217765043, |
|
"grad_norm": 0.34364816546440125, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 10.3123, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2836676217765043, |
|
"eval_loss": 10.31053352355957, |
|
"eval_runtime": 1.029, |
|
"eval_samples_per_second": 285.719, |
|
"eval_steps_per_second": 35.958, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.28653295128939826, |
|
"grad_norm": 0.3673644959926605, |
|
"learning_rate": 0.0, |
|
"loss": 10.3042, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5256395882496.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|