|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9751552795031055, |
|
"eval_steps": 41, |
|
"global_step": 322, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006211180124223602, |
|
"grad_norm": 0.3814874589443207, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.538, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006211180124223602, |
|
"eval_loss": 1.570106863975525, |
|
"eval_runtime": 26.7523, |
|
"eval_samples_per_second": 2.953, |
|
"eval_steps_per_second": 0.374, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012422360248447204, |
|
"grad_norm": 0.34790050983428955, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5515, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.018633540372670808, |
|
"grad_norm": 0.315510630607605, |
|
"learning_rate": 3e-06, |
|
"loss": 1.6394, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.024844720496894408, |
|
"grad_norm": 0.333831787109375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.5686, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.031055900621118012, |
|
"grad_norm": 0.3356265723705292, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6442, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.037267080745341616, |
|
"grad_norm": 0.3302271366119385, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6543, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 0.3428245484828949, |
|
"learning_rate": 7e-06, |
|
"loss": 1.583, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.049689440993788817, |
|
"grad_norm": 0.3571615517139435, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.5863, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.055900621118012424, |
|
"grad_norm": 0.3609354496002197, |
|
"learning_rate": 9e-06, |
|
"loss": 1.5861, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.062111801242236024, |
|
"grad_norm": 0.38313618302345276, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6357, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06832298136645963, |
|
"grad_norm": 0.3678596317768097, |
|
"learning_rate": 9.99974652980635e-06, |
|
"loss": 1.6452, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07453416149068323, |
|
"grad_norm": 0.36209872364997864, |
|
"learning_rate": 9.998986144924253e-06, |
|
"loss": 1.6364, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08074534161490683, |
|
"grad_norm": 0.3838407099246979, |
|
"learning_rate": 9.997718922447669e-06, |
|
"loss": 1.6456, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.3796548545360565, |
|
"learning_rate": 9.995944990857848e-06, |
|
"loss": 1.5417, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09316770186335403, |
|
"grad_norm": 0.393232524394989, |
|
"learning_rate": 9.993664530010308e-06, |
|
"loss": 1.5553, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09937888198757763, |
|
"grad_norm": 0.40382516384124756, |
|
"learning_rate": 9.990877771116588e-06, |
|
"loss": 1.4974, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10559006211180125, |
|
"grad_norm": 0.410980224609375, |
|
"learning_rate": 9.987584996720813e-06, |
|
"loss": 1.5143, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11180124223602485, |
|
"grad_norm": 0.43435031175613403, |
|
"learning_rate": 9.983786540671052e-06, |
|
"loss": 1.7078, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11801242236024845, |
|
"grad_norm": 0.46852847933769226, |
|
"learning_rate": 9.979482788085455e-06, |
|
"loss": 1.5725, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 0.44078120589256287, |
|
"learning_rate": 9.974674175313228e-06, |
|
"loss": 1.5146, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 0.4520568251609802, |
|
"learning_rate": 9.969361189890373e-06, |
|
"loss": 1.551, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13664596273291926, |
|
"grad_norm": 0.44940102100372314, |
|
"learning_rate": 9.96354437049027e-06, |
|
"loss": 1.5154, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.42880144715309143, |
|
"learning_rate": 9.957224306869053e-06, |
|
"loss": 1.5664, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14906832298136646, |
|
"grad_norm": 0.4296427071094513, |
|
"learning_rate": 9.950401639805822e-06, |
|
"loss": 1.5391, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15527950310559005, |
|
"grad_norm": 0.4148462116718292, |
|
"learning_rate": 9.943077061037672e-06, |
|
"loss": 1.559, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16149068322981366, |
|
"grad_norm": 0.45027458667755127, |
|
"learning_rate": 9.935251313189564e-06, |
|
"loss": 1.5797, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16770186335403728, |
|
"grad_norm": 0.43325772881507874, |
|
"learning_rate": 9.92692518969903e-06, |
|
"loss": 1.5651, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.4363733232021332, |
|
"learning_rate": 9.91809953473572e-06, |
|
"loss": 1.4171, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18012422360248448, |
|
"grad_norm": 0.4001283347606659, |
|
"learning_rate": 9.908775243115822e-06, |
|
"loss": 1.5172, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18633540372670807, |
|
"grad_norm": 0.42918145656585693, |
|
"learning_rate": 9.89895326021134e-06, |
|
"loss": 1.4501, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19254658385093168, |
|
"grad_norm": 0.3902086019515991, |
|
"learning_rate": 9.888634581854235e-06, |
|
"loss": 1.4396, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.19875776397515527, |
|
"grad_norm": 0.4069195091724396, |
|
"learning_rate": 9.87782025423547e-06, |
|
"loss": 1.4399, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20496894409937888, |
|
"grad_norm": 0.40982964634895325, |
|
"learning_rate": 9.86651137379893e-06, |
|
"loss": 1.4445, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2111801242236025, |
|
"grad_norm": 0.4249035716056824, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 1.4402, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.4334266781806946, |
|
"learning_rate": 9.842414590840618e-06, |
|
"loss": 1.4633, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2236024844720497, |
|
"grad_norm": 0.4241773188114166, |
|
"learning_rate": 9.829629131445342e-06, |
|
"loss": 1.5354, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22981366459627328, |
|
"grad_norm": 0.45813798904418945, |
|
"learning_rate": 9.816354005237583e-06, |
|
"loss": 1.4568, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2360248447204969, |
|
"grad_norm": 0.4499090909957886, |
|
"learning_rate": 9.802590558156863e-06, |
|
"loss": 1.4483, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2422360248447205, |
|
"grad_norm": 0.44764548540115356, |
|
"learning_rate": 9.78834018565262e-06, |
|
"loss": 1.4769, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 0.4358612298965454, |
|
"learning_rate": 9.77360433254273e-06, |
|
"loss": 1.5545, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2546583850931677, |
|
"grad_norm": 0.44274434447288513, |
|
"learning_rate": 9.758384492867004e-06, |
|
"loss": 1.3615, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2546583850931677, |
|
"eval_loss": 1.3996479511260986, |
|
"eval_runtime": 27.0472, |
|
"eval_samples_per_second": 2.921, |
|
"eval_steps_per_second": 0.37, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.42516258358955383, |
|
"learning_rate": 9.742682209735727e-06, |
|
"loss": 1.4802, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2670807453416149, |
|
"grad_norm": 0.40934988856315613, |
|
"learning_rate": 9.726499075173201e-06, |
|
"loss": 1.3283, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2732919254658385, |
|
"grad_norm": 0.40728411078453064, |
|
"learning_rate": 9.709836729956326e-06, |
|
"loss": 1.4724, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2795031055900621, |
|
"grad_norm": 0.405512273311615, |
|
"learning_rate": 9.692696863448246e-06, |
|
"loss": 1.3943, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.4585839807987213, |
|
"learning_rate": 9.675081213427076e-06, |
|
"loss": 1.4038, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2919254658385093, |
|
"grad_norm": 0.40490880608558655, |
|
"learning_rate": 9.656991565909703e-06, |
|
"loss": 1.3807, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2981366459627329, |
|
"grad_norm": 0.4017808735370636, |
|
"learning_rate": 9.638429754970715e-06, |
|
"loss": 1.3532, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 0.41484227776527405, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 1.3379, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3105590062111801, |
|
"grad_norm": 0.36943531036376953, |
|
"learning_rate": 9.599897218294122e-06, |
|
"loss": 1.393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3167701863354037, |
|
"grad_norm": 0.39943498373031616, |
|
"learning_rate": 9.57993039929633e-06, |
|
"loss": 1.3039, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.32298136645962733, |
|
"grad_norm": 0.374943345785141, |
|
"learning_rate": 9.55949922996045e-06, |
|
"loss": 1.355, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.32919254658385094, |
|
"grad_norm": 0.4175715446472168, |
|
"learning_rate": 9.538605781763464e-06, |
|
"loss": 1.3716, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.33540372670807456, |
|
"grad_norm": 0.37441951036453247, |
|
"learning_rate": 9.517252173051912e-06, |
|
"loss": 1.3081, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3416149068322981, |
|
"grad_norm": 0.39863765239715576, |
|
"learning_rate": 9.49544056882713e-06, |
|
"loss": 1.3942, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.3854600787162781, |
|
"learning_rate": 9.473173180525737e-06, |
|
"loss": 1.2902, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.35403726708074534, |
|
"grad_norm": 0.42524421215057373, |
|
"learning_rate": 9.450452265795423e-06, |
|
"loss": 1.2544, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.36024844720496896, |
|
"grad_norm": 0.41367658972740173, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 1.3194, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.36645962732919257, |
|
"grad_norm": 0.3787434697151184, |
|
"learning_rate": 9.403659117316093e-06, |
|
"loss": 1.3077, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 0.4197174608707428, |
|
"learning_rate": 9.37959162783444e-06, |
|
"loss": 1.2356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37888198757763975, |
|
"grad_norm": 0.39209407567977905, |
|
"learning_rate": 9.355080099977579e-06, |
|
"loss": 1.2168, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.38509316770186336, |
|
"grad_norm": 0.41228196024894714, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 1.2237, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 0.4241032302379608, |
|
"learning_rate": 9.3047349146132e-06, |
|
"loss": 1.3146, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.39751552795031053, |
|
"grad_norm": 0.4294179677963257, |
|
"learning_rate": 9.278906361507238e-06, |
|
"loss": 1.2716, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.40372670807453415, |
|
"grad_norm": 0.4411190450191498, |
|
"learning_rate": 9.252643978311649e-06, |
|
"loss": 1.2735, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.40993788819875776, |
|
"grad_norm": 0.46513664722442627, |
|
"learning_rate": 9.225950427718974e-06, |
|
"loss": 1.2337, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4161490683229814, |
|
"grad_norm": 0.36713191866874695, |
|
"learning_rate": 9.198828416136991e-06, |
|
"loss": 1.3339, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.422360248447205, |
|
"grad_norm": 0.351909875869751, |
|
"learning_rate": 9.171280693414307e-06, |
|
"loss": 1.358, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.3643895983695984, |
|
"learning_rate": 9.14331005256157e-06, |
|
"loss": 1.2482, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.4360922873020172, |
|
"learning_rate": 9.114919329468283e-06, |
|
"loss": 1.3115, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4409937888198758, |
|
"grad_norm": 0.4254709482192993, |
|
"learning_rate": 9.086111402615274e-06, |
|
"loss": 1.2351, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4472049689440994, |
|
"grad_norm": 0.3786623179912567, |
|
"learning_rate": 9.056889192782865e-06, |
|
"loss": 1.3746, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.453416149068323, |
|
"grad_norm": 0.4647882878780365, |
|
"learning_rate": 9.02725566275473e-06, |
|
"loss": 1.2395, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.45962732919254656, |
|
"grad_norm": 0.4040616452693939, |
|
"learning_rate": 8.997213817017508e-06, |
|
"loss": 1.2998, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4658385093167702, |
|
"grad_norm": 0.41700279712677, |
|
"learning_rate": 8.966766701456177e-06, |
|
"loss": 1.2105, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4720496894409938, |
|
"grad_norm": 0.3908318877220154, |
|
"learning_rate": 8.935917403045251e-06, |
|
"loss": 1.2011, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 0.38885021209716797, |
|
"learning_rate": 8.90466904953579e-06, |
|
"loss": 1.2471, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.484472049689441, |
|
"grad_norm": 0.42335832118988037, |
|
"learning_rate": 8.873024809138272e-06, |
|
"loss": 1.2683, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4906832298136646, |
|
"grad_norm": 0.39654576778411865, |
|
"learning_rate": 8.840987890201404e-06, |
|
"loss": 1.2587, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 0.4123787581920624, |
|
"learning_rate": 8.808561540886796e-06, |
|
"loss": 1.1794, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5031055900621118, |
|
"grad_norm": 0.42227044701576233, |
|
"learning_rate": 8.77574904883967e-06, |
|
"loss": 1.2339, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5093167701863354, |
|
"grad_norm": 0.4471132159233093, |
|
"learning_rate": 8.742553740855507e-06, |
|
"loss": 1.1954, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5093167701863354, |
|
"eval_loss": 1.2136852741241455, |
|
"eval_runtime": 26.7767, |
|
"eval_samples_per_second": 2.95, |
|
"eval_steps_per_second": 0.373, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.515527950310559, |
|
"grad_norm": 0.42590177059173584, |
|
"learning_rate": 8.708978982542766e-06, |
|
"loss": 1.2426, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.39879706501960754, |
|
"learning_rate": 8.675028177981643e-06, |
|
"loss": 1.2618, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5279503105590062, |
|
"grad_norm": 0.4137211740016937, |
|
"learning_rate": 8.640704769378943e-06, |
|
"loss": 1.1735, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5341614906832298, |
|
"grad_norm": 0.42463183403015137, |
|
"learning_rate": 8.606012236719073e-06, |
|
"loss": 1.1433, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5403726708074534, |
|
"grad_norm": 0.42853713035583496, |
|
"learning_rate": 8.570954097411224e-06, |
|
"loss": 1.2307, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.546583850931677, |
|
"grad_norm": 0.408589243888855, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 1.1523, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5527950310559007, |
|
"grad_norm": 0.42884618043899536, |
|
"learning_rate": 8.499755253468732e-06, |
|
"loss": 1.2494, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5590062111801242, |
|
"grad_norm": 0.4813741445541382, |
|
"learning_rate": 8.463621767547998e-06, |
|
"loss": 1.2417, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 0.4021080732345581, |
|
"learning_rate": 8.4271371116752e-06, |
|
"loss": 1.2715, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.3832411468029022, |
|
"learning_rate": 8.390304984959455e-06, |
|
"loss": 1.1586, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.577639751552795, |
|
"grad_norm": 0.4360566735267639, |
|
"learning_rate": 8.35312912173928e-06, |
|
"loss": 1.217, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5838509316770186, |
|
"grad_norm": 0.44515809416770935, |
|
"learning_rate": 8.315613291203977e-06, |
|
"loss": 1.264, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5900621118012422, |
|
"grad_norm": 0.41051459312438965, |
|
"learning_rate": 8.277761297011475e-06, |
|
"loss": 1.1578, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5962732919254659, |
|
"grad_norm": 0.45815277099609375, |
|
"learning_rate": 8.239576976902694e-06, |
|
"loss": 1.2084, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6024844720496895, |
|
"grad_norm": 0.4373926520347595, |
|
"learning_rate": 8.20106420231244e-06, |
|
"loss": 1.1925, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.444323867559433, |
|
"learning_rate": 8.162226877976886e-06, |
|
"loss": 1.2183, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6149068322981367, |
|
"grad_norm": 0.4503950774669647, |
|
"learning_rate": 8.123068941537681e-06, |
|
"loss": 1.1156, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 0.4149700105190277, |
|
"learning_rate": 8.083594363142717e-06, |
|
"loss": 1.2379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6273291925465838, |
|
"grad_norm": 0.472960501909256, |
|
"learning_rate": 8.043807145043604e-06, |
|
"loss": 1.14, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6335403726708074, |
|
"grad_norm": 0.40744665265083313, |
|
"learning_rate": 8.003711321189895e-06, |
|
"loss": 1.2446, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.639751552795031, |
|
"grad_norm": 0.3995897173881531, |
|
"learning_rate": 7.963310956820085e-06, |
|
"loss": 1.179, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6459627329192547, |
|
"grad_norm": 0.4376934766769409, |
|
"learning_rate": 7.922610148049445e-06, |
|
"loss": 1.2818, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.4294185936450958, |
|
"learning_rate": 7.881613021454728e-06, |
|
"loss": 1.1463, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6583850931677019, |
|
"grad_norm": 0.46748086810112, |
|
"learning_rate": 7.84032373365578e-06, |
|
"loss": 1.1702, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6645962732919255, |
|
"grad_norm": 0.47421255707740784, |
|
"learning_rate": 7.798746470894113e-06, |
|
"loss": 1.2922, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6708074534161491, |
|
"grad_norm": 0.37985584139823914, |
|
"learning_rate": 7.75688544860846e-06, |
|
"loss": 1.1581, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6770186335403726, |
|
"grad_norm": 0.4336451590061188, |
|
"learning_rate": 7.714744911007395e-06, |
|
"loss": 1.1532, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6832298136645962, |
|
"grad_norm": 0.4210762679576874, |
|
"learning_rate": 7.672329130639007e-06, |
|
"loss": 1.2047, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6894409937888198, |
|
"grad_norm": 0.42413201928138733, |
|
"learning_rate": 7.62964240795772e-06, |
|
"loss": 1.125, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.47521328926086426, |
|
"learning_rate": 7.586689070888284e-06, |
|
"loss": 1.2143, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7018633540372671, |
|
"grad_norm": 0.41499438881874084, |
|
"learning_rate": 7.543473474386962e-06, |
|
"loss": 1.07, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7080745341614907, |
|
"grad_norm": 0.4027252495288849, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.262, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.4009539484977722, |
|
"learning_rate": 7.4562730554193875e-06, |
|
"loss": 1.1742, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7204968944099379, |
|
"grad_norm": 0.46428173780441284, |
|
"learning_rate": 7.412297074035968e-06, |
|
"loss": 1.1629, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7267080745341615, |
|
"grad_norm": 0.4374319016933441, |
|
"learning_rate": 7.368076514489947e-06, |
|
"loss": 1.1065, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7329192546583851, |
|
"grad_norm": 0.42879152297973633, |
|
"learning_rate": 7.323615860218844e-06, |
|
"loss": 1.193, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 0.4889649748802185, |
|
"learning_rate": 7.2789196190029155e-06, |
|
"loss": 1.2618, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 0.4490755796432495, |
|
"learning_rate": 7.2339923225081296e-06, |
|
"loss": 1.1857, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7515527950310559, |
|
"grad_norm": 0.4516308009624481, |
|
"learning_rate": 7.188838525826702e-06, |
|
"loss": 1.1076, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7577639751552795, |
|
"grad_norm": 0.5366808176040649, |
|
"learning_rate": 7.143462807015271e-06, |
|
"loss": 1.1523, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7639751552795031, |
|
"grad_norm": 0.4146997332572937, |
|
"learning_rate": 7.09786976663073e-06, |
|
"loss": 1.1766, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7639751552795031, |
|
"eval_loss": 1.1551172733306885, |
|
"eval_runtime": 26.7368, |
|
"eval_samples_per_second": 2.955, |
|
"eval_steps_per_second": 0.374, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7701863354037267, |
|
"grad_norm": 0.4490116536617279, |
|
"learning_rate": 7.052064027263785e-06, |
|
"loss": 1.1971, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7763975155279503, |
|
"grad_norm": 0.48434358835220337, |
|
"learning_rate": 7.006050233070289e-06, |
|
"loss": 1.239, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.4746357798576355, |
|
"learning_rate": 6.959833049300376e-06, |
|
"loss": 1.1586, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7888198757763976, |
|
"grad_norm": 0.39503100514411926, |
|
"learning_rate": 6.913417161825449e-06, |
|
"loss": 1.0894, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7950310559006211, |
|
"grad_norm": 0.43696898221969604, |
|
"learning_rate": 6.8668072766631054e-06, |
|
"loss": 1.0384, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8012422360248447, |
|
"grad_norm": 0.4739917814731598, |
|
"learning_rate": 6.820008119499992e-06, |
|
"loss": 1.1741, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8074534161490683, |
|
"grad_norm": 0.4144044518470764, |
|
"learning_rate": 6.773024435212678e-06, |
|
"loss": 1.1911, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8136645962732919, |
|
"grad_norm": 0.4699600636959076, |
|
"learning_rate": 6.7258609873865895e-06, |
|
"loss": 1.2041, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8198757763975155, |
|
"grad_norm": 0.40221303701400757, |
|
"learning_rate": 6.678522557833025e-06, |
|
"loss": 1.1077, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.4720539152622223, |
|
"learning_rate": 6.631013946104348e-06, |
|
"loss": 1.0987, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8322981366459627, |
|
"grad_norm": 0.4860423505306244, |
|
"learning_rate": 6.583339969007364e-06, |
|
"loss": 1.1455, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8385093167701864, |
|
"grad_norm": 0.5083861947059631, |
|
"learning_rate": 6.5355054601149545e-06, |
|
"loss": 1.1314, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.84472049689441, |
|
"grad_norm": 0.5092173218727112, |
|
"learning_rate": 6.487515269276015e-06, |
|
"loss": 1.2228, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8509316770186336, |
|
"grad_norm": 0.4544651508331299, |
|
"learning_rate": 6.439374262123731e-06, |
|
"loss": 1.1177, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.45371013879776, |
|
"learning_rate": 6.391087319582264e-06, |
|
"loss": 1.1782, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8633540372670807, |
|
"grad_norm": 0.5025550723075867, |
|
"learning_rate": 6.342659337371884e-06, |
|
"loss": 1.1594, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.48699814081192017, |
|
"learning_rate": 6.294095225512604e-06, |
|
"loss": 1.1246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8757763975155279, |
|
"grad_norm": 0.442331999540329, |
|
"learning_rate": 6.2453999078263596e-06, |
|
"loss": 1.1265, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8819875776397516, |
|
"grad_norm": 0.4538658857345581, |
|
"learning_rate": 6.1965783214377895e-06, |
|
"loss": 1.2054, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8881987577639752, |
|
"grad_norm": 0.4796921908855438, |
|
"learning_rate": 6.147635416273679e-06, |
|
"loss": 1.1621, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8944099378881988, |
|
"grad_norm": 0.4758358895778656, |
|
"learning_rate": 6.0985761545610865e-06, |
|
"loss": 1.117, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9006211180124224, |
|
"grad_norm": 0.44177138805389404, |
|
"learning_rate": 6.049405510324237e-06, |
|
"loss": 1.1016, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.906832298136646, |
|
"grad_norm": 0.48750707507133484, |
|
"learning_rate": 6.000128468880223e-06, |
|
"loss": 1.2238, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 0.53321373462677, |
|
"learning_rate": 5.950750026333534e-06, |
|
"loss": 1.0768, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.9192546583850931, |
|
"grad_norm": 0.4829699993133545, |
|
"learning_rate": 5.90127518906953e-06, |
|
"loss": 1.1229, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9254658385093167, |
|
"grad_norm": 0.5378725528717041, |
|
"learning_rate": 5.851708973246841e-06, |
|
"loss": 1.076, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.9316770186335404, |
|
"grad_norm": 0.44234123826026917, |
|
"learning_rate": 5.8020564042888015e-06, |
|
"loss": 1.1543, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.937888198757764, |
|
"grad_norm": 0.41885337233543396, |
|
"learning_rate": 5.752322516373916e-06, |
|
"loss": 1.1918, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.9440993788819876, |
|
"grad_norm": 0.4357737600803375, |
|
"learning_rate": 5.7025123519254644e-06, |
|
"loss": 1.1822, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9503105590062112, |
|
"grad_norm": 0.48525384068489075, |
|
"learning_rate": 5.65263096110026e-06, |
|
"loss": 1.1569, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.4788309335708618, |
|
"learning_rate": 5.6026834012766155e-06, |
|
"loss": 1.1826, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.9627329192546584, |
|
"grad_norm": 0.4695623815059662, |
|
"learning_rate": 5.5526747365416e-06, |
|
"loss": 1.196, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.968944099378882, |
|
"grad_norm": 0.38793641328811646, |
|
"learning_rate": 5.502610037177586e-06, |
|
"loss": 1.0959, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.9751552795031055, |
|
"grad_norm": 0.4816080629825592, |
|
"learning_rate": 5.45249437914819e-06, |
|
"loss": 1.1359, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.9813664596273292, |
|
"grad_norm": 0.4505300223827362, |
|
"learning_rate": 5.402332843583631e-06, |
|
"loss": 1.0793, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9875776397515528, |
|
"grad_norm": 0.5171789526939392, |
|
"learning_rate": 5.35213051626556e-06, |
|
"loss": 1.1582, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 0.4746135473251343, |
|
"learning_rate": 5.301892487111431e-06, |
|
"loss": 1.13, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5038459897041321, |
|
"learning_rate": 5.251623849658434e-06, |
|
"loss": 1.2033, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.0062111801242235, |
|
"grad_norm": 0.4865545332431793, |
|
"learning_rate": 5.201329700547077e-06, |
|
"loss": 1.2215, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.0124223602484472, |
|
"grad_norm": 0.44650915265083313, |
|
"learning_rate": 5.151015139004445e-06, |
|
"loss": 1.1592, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.0186335403726707, |
|
"grad_norm": 0.4772714376449585, |
|
"learning_rate": 5.100685266327202e-06, |
|
"loss": 1.1361, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.0186335403726707, |
|
"eval_loss": 1.1316967010498047, |
|
"eval_runtime": 26.8815, |
|
"eval_samples_per_second": 2.939, |
|
"eval_steps_per_second": 0.372, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.0248447204968945, |
|
"grad_norm": 0.4834270179271698, |
|
"learning_rate": 5.050345185364378e-06, |
|
"loss": 1.1859, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0062111801242235, |
|
"grad_norm": 0.48713916540145874, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0621, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.0124223602484472, |
|
"grad_norm": 0.4767020642757416, |
|
"learning_rate": 4.949654814635623e-06, |
|
"loss": 1.0793, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0186335403726707, |
|
"grad_norm": 0.4696958065032959, |
|
"learning_rate": 4.8993147336728e-06, |
|
"loss": 1.2284, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0248447204968945, |
|
"grad_norm": 0.47341403365135193, |
|
"learning_rate": 4.848984860995557e-06, |
|
"loss": 1.1235, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.031055900621118, |
|
"grad_norm": 0.5151461958885193, |
|
"learning_rate": 4.798670299452926e-06, |
|
"loss": 1.1104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0372670807453417, |
|
"grad_norm": 0.4465959668159485, |
|
"learning_rate": 4.748376150341566e-06, |
|
"loss": 1.106, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.4610389769077301, |
|
"learning_rate": 4.69810751288857e-06, |
|
"loss": 1.2657, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.049689440993789, |
|
"grad_norm": 0.4607885479927063, |
|
"learning_rate": 4.647869483734441e-06, |
|
"loss": 1.1618, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.0559006211180124, |
|
"grad_norm": 0.4866524934768677, |
|
"learning_rate": 4.597667156416371e-06, |
|
"loss": 1.1848, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.062111801242236, |
|
"grad_norm": 0.4338420033454895, |
|
"learning_rate": 4.547505620851812e-06, |
|
"loss": 1.1223, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0683229813664596, |
|
"grad_norm": 0.5134140849113464, |
|
"learning_rate": 4.497389962822416e-06, |
|
"loss": 1.1493, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.0745341614906831, |
|
"grad_norm": 0.5529913902282715, |
|
"learning_rate": 4.447325263458401e-06, |
|
"loss": 1.1222, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0807453416149069, |
|
"grad_norm": 0.48266178369522095, |
|
"learning_rate": 4.397316598723385e-06, |
|
"loss": 1.2108, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.42973291873931885, |
|
"learning_rate": 4.347369038899744e-06, |
|
"loss": 1.1305, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.093167701863354, |
|
"grad_norm": 0.4676703214645386, |
|
"learning_rate": 4.297487648074538e-06, |
|
"loss": 1.2059, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0993788819875776, |
|
"grad_norm": 0.4928361773490906, |
|
"learning_rate": 4.247677483626085e-06, |
|
"loss": 1.1674, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.1055900621118013, |
|
"grad_norm": 0.526463508605957, |
|
"learning_rate": 4.1979435957111984e-06, |
|
"loss": 1.0983, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.1118012422360248, |
|
"grad_norm": 0.44327542185783386, |
|
"learning_rate": 4.148291026753159e-06, |
|
"loss": 1.1574, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 0.4704626798629761, |
|
"learning_rate": 4.098724810930472e-06, |
|
"loss": 1.1953, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.124223602484472, |
|
"grad_norm": 0.43922385573387146, |
|
"learning_rate": 4.049249973666468e-06, |
|
"loss": 1.1376, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.462429404258728, |
|
"learning_rate": 3.999871531119779e-06, |
|
"loss": 1.0687, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.1366459627329193, |
|
"grad_norm": 0.47191864252090454, |
|
"learning_rate": 3.9505944896757635e-06, |
|
"loss": 1.139, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.46805888414382935, |
|
"learning_rate": 3.901423845438916e-06, |
|
"loss": 1.1135, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.1490683229813665, |
|
"grad_norm": 0.4984455704689026, |
|
"learning_rate": 3.852364583726324e-06, |
|
"loss": 1.0954, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.15527950310559, |
|
"grad_norm": 0.5003888010978699, |
|
"learning_rate": 3.803421678562213e-06, |
|
"loss": 1.1432, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1614906832298137, |
|
"grad_norm": 0.5040602087974548, |
|
"learning_rate": 3.7546000921736413e-06, |
|
"loss": 1.1675, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.1677018633540373, |
|
"grad_norm": 0.45909303426742554, |
|
"learning_rate": 3.705904774487396e-06, |
|
"loss": 1.1865, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.4990633428096771, |
|
"learning_rate": 3.657340662628116e-06, |
|
"loss": 1.1393, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.1801242236024845, |
|
"grad_norm": 0.4696108400821686, |
|
"learning_rate": 3.6089126804177373e-06, |
|
"loss": 1.1307, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.186335403726708, |
|
"grad_norm": 0.5014775395393372, |
|
"learning_rate": 3.56062573787627e-06, |
|
"loss": 1.137, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1925465838509317, |
|
"grad_norm": 0.46463677287101746, |
|
"learning_rate": 3.5124847307239863e-06, |
|
"loss": 1.1561, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.1987577639751552, |
|
"grad_norm": 0.43704545497894287, |
|
"learning_rate": 3.464494539885047e-06, |
|
"loss": 1.1201, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.204968944099379, |
|
"grad_norm": 0.445740669965744, |
|
"learning_rate": 3.416660030992639e-06, |
|
"loss": 1.1516, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.2111801242236024, |
|
"grad_norm": 0.5268468856811523, |
|
"learning_rate": 3.3689860538956547e-06, |
|
"loss": 1.1288, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.4934462606906891, |
|
"learning_rate": 3.3214774421669777e-06, |
|
"loss": 1.1642, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2236024844720497, |
|
"grad_norm": 0.5016915202140808, |
|
"learning_rate": 3.274139012613411e-06, |
|
"loss": 1.0885, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.2298136645962732, |
|
"grad_norm": 0.4747258722782135, |
|
"learning_rate": 3.226975564787322e-06, |
|
"loss": 1.1381, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.236024844720497, |
|
"grad_norm": 0.522182285785675, |
|
"learning_rate": 3.1799918805000097e-06, |
|
"loss": 1.0668, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 0.5038187503814697, |
|
"learning_rate": 3.1331927233368954e-06, |
|
"loss": 1.0818, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.2484472049689441, |
|
"grad_norm": 0.46231281757354736, |
|
"learning_rate": 3.0865828381745515e-06, |
|
"loss": 1.0431, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2484472049689441, |
|
"eval_loss": 1.1210699081420898, |
|
"eval_runtime": 26.9725, |
|
"eval_samples_per_second": 2.929, |
|
"eval_steps_per_second": 0.371, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2546583850931676, |
|
"grad_norm": 0.5070775747299194, |
|
"learning_rate": 3.040166950699626e-06, |
|
"loss": 1.1643, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.5011193752288818, |
|
"learning_rate": 2.993949766929711e-06, |
|
"loss": 1.0472, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.2670807453416149, |
|
"grad_norm": 0.4393374025821686, |
|
"learning_rate": 2.947935972736217e-06, |
|
"loss": 1.0875, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.2732919254658386, |
|
"grad_norm": 0.46959808468818665, |
|
"learning_rate": 2.9021302333692738e-06, |
|
"loss": 1.1078, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.279503105590062, |
|
"grad_norm": 0.4769798219203949, |
|
"learning_rate": 2.8565371929847286e-06, |
|
"loss": 1.16, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 0.4200010895729065, |
|
"learning_rate": 2.8111614741732975e-06, |
|
"loss": 1.1258, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.2919254658385093, |
|
"grad_norm": 0.502759575843811, |
|
"learning_rate": 2.766007677491871e-06, |
|
"loss": 1.1263, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.298136645962733, |
|
"grad_norm": 0.48976272344589233, |
|
"learning_rate": 2.7210803809970853e-06, |
|
"loss": 1.174, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.582251787185669, |
|
"learning_rate": 2.6763841397811576e-06, |
|
"loss": 1.1542, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.31055900621118, |
|
"grad_norm": 0.525684654712677, |
|
"learning_rate": 2.631923485510054e-06, |
|
"loss": 1.1594, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3167701863354038, |
|
"grad_norm": 0.5200883150100708, |
|
"learning_rate": 2.587702925964034e-06, |
|
"loss": 1.1374, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.3229813664596273, |
|
"grad_norm": 0.4962819218635559, |
|
"learning_rate": 2.5437269445806146e-06, |
|
"loss": 1.0839, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.329192546583851, |
|
"grad_norm": 0.5223807692527771, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 1.0708, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.3354037267080745, |
|
"grad_norm": 0.434766560792923, |
|
"learning_rate": 2.4565265256130393e-06, |
|
"loss": 1.1387, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.341614906832298, |
|
"grad_norm": 0.4618338346481323, |
|
"learning_rate": 2.4133109291117156e-06, |
|
"loss": 1.0728, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.5078674554824829, |
|
"learning_rate": 2.3703575920422793e-06, |
|
"loss": 1.1649, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.3540372670807455, |
|
"grad_norm": 0.5160240530967712, |
|
"learning_rate": 2.3276708693609947e-06, |
|
"loss": 1.1226, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.360248447204969, |
|
"grad_norm": 0.44181424379348755, |
|
"learning_rate": 2.2852550889926067e-06, |
|
"loss": 1.0961, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.3664596273291925, |
|
"grad_norm": 0.4639110863208771, |
|
"learning_rate": 2.243114551391542e-06, |
|
"loss": 1.0937, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.3726708074534162, |
|
"grad_norm": 0.44535526633262634, |
|
"learning_rate": 2.20125352910589e-06, |
|
"loss": 1.1514, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3788819875776397, |
|
"grad_norm": 0.463021844625473, |
|
"learning_rate": 2.159676266344222e-06, |
|
"loss": 1.1019, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.3850931677018634, |
|
"grad_norm": 0.494054913520813, |
|
"learning_rate": 2.1183869785452744e-06, |
|
"loss": 1.1175, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.5025504231452942, |
|
"learning_rate": 2.077389851950557e-06, |
|
"loss": 1.0575, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.3975155279503104, |
|
"grad_norm": 0.46806544065475464, |
|
"learning_rate": 2.036689043179917e-06, |
|
"loss": 1.1058, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.4037267080745341, |
|
"grad_norm": 0.4613620638847351, |
|
"learning_rate": 1.996288678810105e-06, |
|
"loss": 1.1508, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4099378881987579, |
|
"grad_norm": 0.5427222847938538, |
|
"learning_rate": 1.956192854956397e-06, |
|
"loss": 1.2126, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.4161490683229814, |
|
"grad_norm": 0.4144895374774933, |
|
"learning_rate": 1.9164056368572847e-06, |
|
"loss": 1.0932, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.4223602484472049, |
|
"grad_norm": 0.47324222326278687, |
|
"learning_rate": 1.87693105846232e-06, |
|
"loss": 1.075, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.4890088737010956, |
|
"learning_rate": 1.8377731220231144e-06, |
|
"loss": 1.1909, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.5050180554389954, |
|
"learning_rate": 1.7989357976875603e-06, |
|
"loss": 1.0406, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4409937888198758, |
|
"grad_norm": 0.5166496634483337, |
|
"learning_rate": 1.7604230230973068e-06, |
|
"loss": 1.1579, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.4472049689440993, |
|
"grad_norm": 0.5063875913619995, |
|
"learning_rate": 1.7222387029885268e-06, |
|
"loss": 1.0532, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.453416149068323, |
|
"grad_norm": 0.43411558866500854, |
|
"learning_rate": 1.6843867087960252e-06, |
|
"loss": 1.1798, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.4596273291925466, |
|
"grad_norm": 0.5085105895996094, |
|
"learning_rate": 1.6468708782607213e-06, |
|
"loss": 1.0451, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.4658385093167703, |
|
"grad_norm": 0.5479459166526794, |
|
"learning_rate": 1.6096950150405454e-06, |
|
"loss": 1.1467, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4720496894409938, |
|
"grad_norm": 0.5029129981994629, |
|
"learning_rate": 1.572862888324801e-06, |
|
"loss": 1.1512, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.4606218934059143, |
|
"learning_rate": 1.5363782324520033e-06, |
|
"loss": 1.1006, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.484472049689441, |
|
"grad_norm": 0.4794401526451111, |
|
"learning_rate": 1.5002447465312675e-06, |
|
"loss": 1.0808, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.4906832298136645, |
|
"grad_norm": 0.49627459049224854, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 1.1399, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.4968944099378882, |
|
"grad_norm": 0.5150241851806641, |
|
"learning_rate": 1.4290459025887771e-06, |
|
"loss": 1.1315, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.5031055900621118, |
|
"grad_norm": 0.5009984374046326, |
|
"learning_rate": 1.3939877632809279e-06, |
|
"loss": 1.1501, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.5031055900621118, |
|
"eval_loss": 1.1153790950775146, |
|
"eval_runtime": 26.9095, |
|
"eval_samples_per_second": 2.936, |
|
"eval_steps_per_second": 0.372, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.5093167701863353, |
|
"grad_norm": 0.5049424767494202, |
|
"learning_rate": 1.3592952306210589e-06, |
|
"loss": 1.1243, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.515527950310559, |
|
"grad_norm": 0.5412471890449524, |
|
"learning_rate": 1.3249718220183583e-06, |
|
"loss": 1.1214, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.5261203050613403, |
|
"learning_rate": 1.2910210174572346e-06, |
|
"loss": 1.0369, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.5279503105590062, |
|
"grad_norm": 0.47771474719047546, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 1.0598, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5341614906832297, |
|
"grad_norm": 0.4789620339870453, |
|
"learning_rate": 1.2242509511603318e-06, |
|
"loss": 1.0653, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.5403726708074534, |
|
"grad_norm": 0.5012646317481995, |
|
"learning_rate": 1.1914384591132045e-06, |
|
"loss": 1.0909, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.5465838509316772, |
|
"grad_norm": 0.49552974104881287, |
|
"learning_rate": 1.159012109798598e-06, |
|
"loss": 1.1051, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.5527950310559007, |
|
"grad_norm": 0.48969194293022156, |
|
"learning_rate": 1.1269751908617277e-06, |
|
"loss": 1.2151, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.5590062111801242, |
|
"grad_norm": 0.5141896605491638, |
|
"learning_rate": 1.095330950464213e-06, |
|
"loss": 1.1452, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.5273729562759399, |
|
"learning_rate": 1.0640825969547498e-06, |
|
"loss": 1.1325, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.5029155015945435, |
|
"learning_rate": 1.0332332985438248e-06, |
|
"loss": 1.0992, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.5776397515527951, |
|
"grad_norm": 0.47117024660110474, |
|
"learning_rate": 1.0027861829824953e-06, |
|
"loss": 1.1461, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.5838509316770186, |
|
"grad_norm": 0.4335295855998993, |
|
"learning_rate": 9.7274433724527e-07, |
|
"loss": 1.0592, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.5900621118012421, |
|
"grad_norm": 0.49043720960617065, |
|
"learning_rate": 9.431108072171346e-07, |
|
"loss": 1.1516, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5962732919254659, |
|
"grad_norm": 0.5176280736923218, |
|
"learning_rate": 9.138885973847262e-07, |
|
"loss": 1.168, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.6024844720496896, |
|
"grad_norm": 0.4628904461860657, |
|
"learning_rate": 8.850806705317183e-07, |
|
"loss": 1.0914, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.4468037784099579, |
|
"learning_rate": 8.566899474384299e-07, |
|
"loss": 1.0685, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.6149068322981366, |
|
"grad_norm": 0.5194827318191528, |
|
"learning_rate": 8.287193065856936e-07, |
|
"loss": 1.1787, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.62111801242236, |
|
"grad_norm": 0.48394885659217834, |
|
"learning_rate": 8.011715838630107e-07, |
|
"loss": 1.2196, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6273291925465838, |
|
"grad_norm": 0.486788809299469, |
|
"learning_rate": 7.740495722810271e-07, |
|
"loss": 1.1354, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.6335403726708075, |
|
"grad_norm": 0.5033959150314331, |
|
"learning_rate": 7.473560216883524e-07, |
|
"loss": 1.13, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.639751552795031, |
|
"grad_norm": 0.4517529606819153, |
|
"learning_rate": 7.210936384927631e-07, |
|
"loss": 1.1115, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.6459627329192545, |
|
"grad_norm": 0.5163289904594421, |
|
"learning_rate": 6.952650853867993e-07, |
|
"loss": 1.1724, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.5138719081878662, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 1.1498, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.658385093167702, |
|
"grad_norm": 0.5368619561195374, |
|
"learning_rate": 6.449199000224221e-07, |
|
"loss": 1.1271, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.6645962732919255, |
|
"grad_norm": 0.46004772186279297, |
|
"learning_rate": 6.204083721655607e-07, |
|
"loss": 1.0344, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.670807453416149, |
|
"grad_norm": 0.4616802930831909, |
|
"learning_rate": 5.963408826839079e-07, |
|
"loss": 1.1346, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.6770186335403725, |
|
"grad_norm": 0.514466404914856, |
|
"learning_rate": 5.727198717339511e-07, |
|
"loss": 1.1606, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.6832298136645962, |
|
"grad_norm": 0.4969620406627655, |
|
"learning_rate": 5.49547734204578e-07, |
|
"loss": 1.0281, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.68944099378882, |
|
"grad_norm": 0.5189189314842224, |
|
"learning_rate": 5.268268194742638e-07, |
|
"loss": 1.1554, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.49855127930641174, |
|
"learning_rate": 5.045594311728708e-07, |
|
"loss": 1.1224, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.701863354037267, |
|
"grad_norm": 0.4984905421733856, |
|
"learning_rate": 4.827478269480895e-07, |
|
"loss": 1.0951, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.7080745341614907, |
|
"grad_norm": 0.5486929416656494, |
|
"learning_rate": 4.6139421823653716e-07, |
|
"loss": 1.1237, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.4911029636859894, |
|
"learning_rate": 4.405007700395497e-07, |
|
"loss": 1.0601, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.720496894409938, |
|
"grad_norm": 0.4640960991382599, |
|
"learning_rate": 4.200696007036703e-07, |
|
"loss": 1.059, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.7267080745341614, |
|
"grad_norm": 0.48524951934814453, |
|
"learning_rate": 4.001027817058789e-07, |
|
"loss": 1.1238, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.7329192546583851, |
|
"grad_norm": 0.46812015771865845, |
|
"learning_rate": 3.8060233744356634e-07, |
|
"loss": 1.2098, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.4788132309913635, |
|
"learning_rate": 3.615702450292857e-07, |
|
"loss": 1.1629, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.7453416149068324, |
|
"grad_norm": 0.47269925475120544, |
|
"learning_rate": 3.430084340902973e-07, |
|
"loss": 1.1628, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7515527950310559, |
|
"grad_norm": 0.5449272394180298, |
|
"learning_rate": 3.2491878657292643e-07, |
|
"loss": 1.1587, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.7577639751552794, |
|
"grad_norm": 0.46690839529037476, |
|
"learning_rate": 3.0730313655175647e-07, |
|
"loss": 1.2015, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.7577639751552794, |
|
"eval_loss": 1.1139311790466309, |
|
"eval_runtime": 27.0876, |
|
"eval_samples_per_second": 2.916, |
|
"eval_steps_per_second": 0.369, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.763975155279503, |
|
"grad_norm": 0.570745050907135, |
|
"learning_rate": 2.901632700436757e-07, |
|
"loss": 1.1747, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.7701863354037268, |
|
"grad_norm": 0.5287693738937378, |
|
"learning_rate": 2.7350092482679836e-07, |
|
"loss": 1.0633, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.7763975155279503, |
|
"grad_norm": 0.4709635376930237, |
|
"learning_rate": 2.573177902642726e-07, |
|
"loss": 1.1393, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.49392980337142944, |
|
"learning_rate": 2.416155071329973e-07, |
|
"loss": 1.0933, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.7888198757763976, |
|
"grad_norm": 0.5224959850311279, |
|
"learning_rate": 2.2639566745727203e-07, |
|
"loss": 1.1316, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.795031055900621, |
|
"grad_norm": 0.4887482821941376, |
|
"learning_rate": 2.1165981434738025e-07, |
|
"loss": 1.1066, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.8012422360248448, |
|
"grad_norm": 0.4881725013256073, |
|
"learning_rate": 1.9740944184313882e-07, |
|
"loss": 1.032, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.8074534161490683, |
|
"grad_norm": 0.47561758756637573, |
|
"learning_rate": 1.8364599476241862e-07, |
|
"loss": 1.1154, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.8136645962732918, |
|
"grad_norm": 0.4982871413230896, |
|
"learning_rate": 1.7037086855465902e-07, |
|
"loss": 1.1942, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.8198757763975155, |
|
"grad_norm": 0.7219922542572021, |
|
"learning_rate": 1.575854091593837e-07, |
|
"loss": 1.0776, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.5696266293525696, |
|
"learning_rate": 1.4529091286973994e-07, |
|
"loss": 1.1663, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.8322981366459627, |
|
"grad_norm": 0.45875245332717896, |
|
"learning_rate": 1.3348862620107038e-07, |
|
"loss": 1.1009, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.8385093167701863, |
|
"grad_norm": 0.4918197989463806, |
|
"learning_rate": 1.2217974576453072e-07, |
|
"loss": 1.1357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.84472049689441, |
|
"grad_norm": 0.5131499171257019, |
|
"learning_rate": 1.1136541814576574e-07, |
|
"loss": 1.1376, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.8509316770186337, |
|
"grad_norm": 0.49699532985687256, |
|
"learning_rate": 1.0104673978866164e-07, |
|
"loss": 1.231, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.5739893913269043, |
|
"learning_rate": 9.122475688417954e-08, |
|
"loss": 1.1645, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.8633540372670807, |
|
"grad_norm": 0.4959266781806946, |
|
"learning_rate": 8.190046526428241e-08, |
|
"loss": 1.1419, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.5045937299728394, |
|
"learning_rate": 7.307481030097152e-08, |
|
"loss": 1.1859, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.875776397515528, |
|
"grad_norm": 0.47639361023902893, |
|
"learning_rate": 6.474868681043578e-08, |
|
"loss": 1.1026, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.8819875776397517, |
|
"grad_norm": 0.5153087973594666, |
|
"learning_rate": 5.6922938962329364e-08, |
|
"loss": 1.1612, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.8881987577639752, |
|
"grad_norm": 0.4941299855709076, |
|
"learning_rate": 4.959836019417963e-08, |
|
"loss": 1.0876, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.8944099378881987, |
|
"grad_norm": 0.4527961015701294, |
|
"learning_rate": 4.2775693130948094e-08, |
|
"loss": 1.1657, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.9006211180124224, |
|
"grad_norm": 0.49893486499786377, |
|
"learning_rate": 3.645562950973014e-08, |
|
"loss": 1.1261, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9068322981366461, |
|
"grad_norm": 0.498981773853302, |
|
"learning_rate": 3.063881010962611e-08, |
|
"loss": 1.1179, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.5112736225128174, |
|
"learning_rate": 2.5325824686772138e-08, |
|
"loss": 1.1863, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.9192546583850931, |
|
"grad_norm": 0.539107620716095, |
|
"learning_rate": 2.0517211914545254e-08, |
|
"loss": 1.0501, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.9254658385093166, |
|
"grad_norm": 0.514134407043457, |
|
"learning_rate": 1.6213459328950355e-08, |
|
"loss": 1.1264, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.9316770186335404, |
|
"grad_norm": 0.5193644165992737, |
|
"learning_rate": 1.2415003279186988e-08, |
|
"loss": 1.1484, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.937888198757764, |
|
"grad_norm": 0.4869459867477417, |
|
"learning_rate": 9.12222888341252e-09, |
|
"loss": 1.1454, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.9440993788819876, |
|
"grad_norm": 0.4767916798591614, |
|
"learning_rate": 6.335469989692255e-09, |
|
"loss": 1.2203, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.950310559006211, |
|
"grad_norm": 0.5365714430809021, |
|
"learning_rate": 4.055009142152066e-09, |
|
"loss": 1.252, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.4672715961933136, |
|
"learning_rate": 2.2810775523329775e-09, |
|
"loss": 1.134, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.9627329192546585, |
|
"grad_norm": 0.45685145258903503, |
|
"learning_rate": 1.0138550757493592e-09, |
|
"loss": 1.1381, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.968944099378882, |
|
"grad_norm": 0.49931174516677856, |
|
"learning_rate": 2.534701936512951e-10, |
|
"loss": 1.1142, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.9751552795031055, |
|
"grad_norm": 0.4903966188430786, |
|
"learning_rate": 0.0, |
|
"loss": 1.0961, |
|
"step": 322 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 322, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 81, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7811992452614062e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|