{ "best_metric": 1.498180627822876, "best_model_checkpoint": "saves/Gemma-2B/lora/train_2024-03-01-04-36-32/checkpoint-1400", "epoch": 2.488888888888889, "eval_steps": 100, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 6.6079277992248535, "learning_rate": 4.999960939662063e-05, "loss": 3.747, "step": 10 }, { "epoch": 0.04, "grad_norm": 3.2283411026000977, "learning_rate": 4.999843759868819e-05, "loss": 3.5789, "step": 20 }, { "epoch": 0.05, "grad_norm": 41.573001861572266, "learning_rate": 4.999648464281934e-05, "loss": 3.1683, "step": 30 }, { "epoch": 0.07, "grad_norm": 4.080965518951416, "learning_rate": 4.9993750590040575e-05, "loss": 2.8275, "step": 40 }, { "epoch": 0.09, "grad_norm": 4.576275825500488, "learning_rate": 4.999023552578632e-05, "loss": 2.6758, "step": 50 }, { "epoch": 0.11, "grad_norm": 18.012842178344727, "learning_rate": 4.998593955989626e-05, "loss": 2.6287, "step": 60 }, { "epoch": 0.12, "grad_norm": 5.738934516906738, "learning_rate": 4.9980862826611875e-05, "loss": 2.5284, "step": 70 }, { "epoch": 0.14, "grad_norm": 3.353776216506958, "learning_rate": 4.9975005484572305e-05, "loss": 2.2608, "step": 80 }, { "epoch": 0.16, "grad_norm": 4.6298699378967285, "learning_rate": 4.9968367716809374e-05, "loss": 2.2475, "step": 90 }, { "epoch": 0.18, "grad_norm": 50.594207763671875, "learning_rate": 4.996094973074183e-05, "loss": 2.2007, "step": 100 }, { "epoch": 0.18, "eval_loss": 2.126384735107422, "eval_runtime": 124.9221, "eval_samples_per_second": 8.005, "eval_steps_per_second": 2.001, "step": 100 }, { "epoch": 0.2, "grad_norm": 10.225520133972168, "learning_rate": 4.995275175816891e-05, "loss": 1.9414, "step": 110 }, { "epoch": 0.21, "grad_norm": 4.777626991271973, "learning_rate": 4.994377405526308e-05, "loss": 1.9729, "step": 120 }, { "epoch": 0.23, "grad_norm": 6.133576393127441, "learning_rate": 4.993401690256203e-05, "loss": 2.0237, "step": 130 }, { "epoch": 0.25, "grad_norm": 5.396271228790283, "learning_rate": 4.992348060495989e-05, "loss": 2.009, "step": 140 }, { "epoch": 0.27, "grad_norm": 3.4974453449249268, "learning_rate": 4.991216549169776e-05, "loss": 2.032, "step": 150 }, { "epoch": 0.28, "grad_norm": 12.256199836730957, "learning_rate": 4.990007191635334e-05, "loss": 1.9548, "step": 160 }, { "epoch": 0.3, "grad_norm": 7.5634379386901855, "learning_rate": 4.988720025682995e-05, "loss": 1.8164, "step": 170 }, { "epoch": 0.32, "grad_norm": 14.023727416992188, "learning_rate": 4.987355091534468e-05, "loss": 1.8517, "step": 180 }, { "epoch": 0.34, "grad_norm": 4.622091293334961, "learning_rate": 4.985912431841584e-05, "loss": 2.0255, "step": 190 }, { "epoch": 0.36, "grad_norm": 3.9935083389282227, "learning_rate": 4.9843920916849645e-05, "loss": 1.8777, "step": 200 }, { "epoch": 0.36, "eval_loss": 1.8619400262832642, "eval_runtime": 124.8712, "eval_samples_per_second": 8.008, "eval_steps_per_second": 2.002, "step": 200 }, { "epoch": 0.37, "grad_norm": 6.256485939025879, "learning_rate": 4.982794118572609e-05, "loss": 1.8885, "step": 210 }, { "epoch": 0.39, "grad_norm": 13.212824821472168, "learning_rate": 4.981118562438414e-05, "loss": 1.7744, "step": 220 }, { "epoch": 0.41, "grad_norm": 4.2626118659973145, "learning_rate": 4.9793654756406085e-05, "loss": 1.7545, "step": 230 }, { "epoch": 0.43, "grad_norm": 4.217405796051025, "learning_rate": 4.9775349129601243e-05, "loss": 1.5633, "step": 240 }, { "epoch": 0.44, "grad_norm": 22.393404006958008, "learning_rate": 4.9756269315988804e-05, "loss": 1.8871, "step": 250 }, { "epoch": 0.46, "grad_norm": 3.6576473712921143, "learning_rate": 4.973641591177991e-05, "loss": 1.7037, "step": 260 }, { "epoch": 0.48, "grad_norm": 4.2433271408081055, "learning_rate": 4.971578953735912e-05, "loss": 1.7631, "step": 270 }, { "epoch": 0.5, "grad_norm": 3.7399721145629883, "learning_rate": 4.969439083726496e-05, "loss": 1.7714, "step": 280 }, { "epoch": 0.52, "grad_norm": 4.575680255889893, "learning_rate": 4.967222048016979e-05, "loss": 1.8699, "step": 290 }, { "epoch": 0.53, "grad_norm": 7.729683876037598, "learning_rate": 4.964927915885893e-05, "loss": 1.6566, "step": 300 }, { "epoch": 0.53, "eval_loss": 1.7350378036499023, "eval_runtime": 124.9278, "eval_samples_per_second": 8.005, "eval_steps_per_second": 2.001, "step": 300 }, { "epoch": 0.55, "grad_norm": 2.755899667739868, "learning_rate": 4.962556759020898e-05, "loss": 1.7193, "step": 310 }, { "epoch": 0.57, "grad_norm": 3.513024091720581, "learning_rate": 4.960108651516545e-05, "loss": 1.852, "step": 320 }, { "epoch": 0.59, "grad_norm": 3.7794790267944336, "learning_rate": 4.9575836698719605e-05, "loss": 1.6785, "step": 330 }, { "epoch": 0.6, "grad_norm": 3.2256739139556885, "learning_rate": 4.954981892988451e-05, "loss": 1.6648, "step": 340 }, { "epoch": 0.62, "grad_norm": 2.8756954669952393, "learning_rate": 4.952303402167047e-05, "loss": 1.6399, "step": 350 }, { "epoch": 0.64, "grad_norm": 7.057961463928223, "learning_rate": 4.949548281105951e-05, "loss": 1.5875, "step": 360 }, { "epoch": 0.66, "grad_norm": 4.63081169128418, "learning_rate": 4.946716615897932e-05, "loss": 1.6708, "step": 370 }, { "epoch": 0.68, "grad_norm": 8.755204200744629, "learning_rate": 4.943808495027631e-05, "loss": 1.636, "step": 380 }, { "epoch": 0.69, "grad_norm": 10.21866226196289, "learning_rate": 4.940824009368793e-05, "loss": 1.5714, "step": 390 }, { "epoch": 0.71, "grad_norm": 5.44133186340332, "learning_rate": 4.937763252181434e-05, "loss": 1.4084, "step": 400 }, { "epoch": 0.71, "eval_loss": 1.6840696334838867, "eval_runtime": 124.8851, "eval_samples_per_second": 8.007, "eval_steps_per_second": 2.002, "step": 400 }, { "epoch": 0.73, "grad_norm": 3.056345224380493, "learning_rate": 4.934626319108923e-05, "loss": 1.7233, "step": 410 }, { "epoch": 0.75, "grad_norm": 4.303133487701416, "learning_rate": 4.93141330817499e-05, "loss": 1.5374, "step": 420 }, { "epoch": 0.76, "grad_norm": 5.2246623039245605, "learning_rate": 4.9281243197806726e-05, "loss": 1.8547, "step": 430 }, { "epoch": 0.78, "grad_norm": 3.8070685863494873, "learning_rate": 4.924759456701167e-05, "loss": 1.5721, "step": 440 }, { "epoch": 0.8, "grad_norm": 3.243337392807007, "learning_rate": 4.9213188240826245e-05, "loss": 1.4322, "step": 450 }, { "epoch": 0.82, "grad_norm": 4.166132926940918, "learning_rate": 4.917802529438864e-05, "loss": 1.6621, "step": 460 }, { "epoch": 0.84, "grad_norm": 4.54414701461792, "learning_rate": 4.9142106826480114e-05, "loss": 1.6088, "step": 470 }, { "epoch": 0.85, "grad_norm": 9.983458518981934, "learning_rate": 4.910543395949067e-05, "loss": 1.6152, "step": 480 }, { "epoch": 0.87, "grad_norm": 6.45111608505249, "learning_rate": 4.9068007839383946e-05, "loss": 1.6361, "step": 490 }, { "epoch": 0.89, "grad_norm": 108.82310485839844, "learning_rate": 4.9029829635661475e-05, "loss": 1.7045, "step": 500 }, { "epoch": 0.89, "eval_loss": 1.6494970321655273, "eval_runtime": 124.6904, "eval_samples_per_second": 8.02, "eval_steps_per_second": 2.005, "step": 500 }, { "epoch": 0.91, "grad_norm": 5.705786228179932, "learning_rate": 4.899090054132609e-05, "loss": 1.738, "step": 510 }, { "epoch": 0.92, "grad_norm": 4.800131320953369, "learning_rate": 4.895122177284465e-05, "loss": 1.6218, "step": 520 }, { "epoch": 0.94, "grad_norm": 10.11057186126709, "learning_rate": 4.891079457011005e-05, "loss": 1.5169, "step": 530 }, { "epoch": 0.96, "grad_norm": 9.329095840454102, "learning_rate": 4.8869620196402436e-05, "loss": 1.7979, "step": 540 }, { "epoch": 0.98, "grad_norm": 3.9115641117095947, "learning_rate": 4.882769993834978e-05, "loss": 1.7073, "step": 550 }, { "epoch": 1.0, "grad_norm": 4.80266809463501, "learning_rate": 4.878503510588765e-05, "loss": 1.6541, "step": 560 }, { "epoch": 1.01, "grad_norm": 9.07653522491455, "learning_rate": 4.874162703221823e-05, "loss": 1.6888, "step": 570 }, { "epoch": 1.03, "grad_norm": 4.492751598358154, "learning_rate": 4.8697477073768766e-05, "loss": 1.6448, "step": 580 }, { "epoch": 1.05, "grad_norm": 13.852599143981934, "learning_rate": 4.8652586610149095e-05, "loss": 1.6236, "step": 590 }, { "epoch": 1.07, "grad_norm": 5.424524307250977, "learning_rate": 4.8606957044108556e-05, "loss": 1.4969, "step": 600 }, { "epoch": 1.07, "eval_loss": 1.6121476888656616, "eval_runtime": 124.7413, "eval_samples_per_second": 8.017, "eval_steps_per_second": 2.004, "step": 600 }, { "epoch": 1.08, "grad_norm": 3.611617088317871, "learning_rate": 4.856058980149216e-05, "loss": 1.4571, "step": 610 }, { "epoch": 1.1, "grad_norm": 4.210519313812256, "learning_rate": 4.851348633119606e-05, "loss": 1.63, "step": 620 }, { "epoch": 1.12, "grad_norm": 95.43629455566406, "learning_rate": 4.84656481051222e-05, "loss": 1.6034, "step": 630 }, { "epoch": 1.14, "grad_norm": 4.3693528175354, "learning_rate": 4.8417076618132426e-05, "loss": 1.5791, "step": 640 }, { "epoch": 1.16, "grad_norm": 3.691178321838379, "learning_rate": 4.836777338800168e-05, "loss": 1.5327, "step": 650 }, { "epoch": 1.17, "grad_norm": 3.547637939453125, "learning_rate": 4.8317739955370636e-05, "loss": 1.4278, "step": 660 }, { "epoch": 1.19, "grad_norm": 3.426717519760132, "learning_rate": 4.8266977883697515e-05, "loss": 1.5317, "step": 670 }, { "epoch": 1.21, "grad_norm": 3.004473924636841, "learning_rate": 4.821548875920927e-05, "loss": 1.6848, "step": 680 }, { "epoch": 1.23, "grad_norm": 3.686044931411743, "learning_rate": 4.816327419085196e-05, "loss": 1.6079, "step": 690 }, { "epoch": 1.24, "grad_norm": 4.130298137664795, "learning_rate": 4.811033581024056e-05, "loss": 1.5998, "step": 700 }, { "epoch": 1.24, "eval_loss": 1.5970302820205688, "eval_runtime": 124.9388, "eval_samples_per_second": 8.004, "eval_steps_per_second": 2.001, "step": 700 }, { "epoch": 1.26, "grad_norm": 6.1143059730529785, "learning_rate": 4.805667527160788e-05, "loss": 1.554, "step": 710 }, { "epoch": 1.28, "grad_norm": 31.27813148498535, "learning_rate": 4.800229425175294e-05, "loss": 1.5824, "step": 720 }, { "epoch": 1.3, "grad_norm": 9.035768508911133, "learning_rate": 4.7947194449988555e-05, "loss": 1.547, "step": 730 }, { "epoch": 1.32, "grad_norm": 39.38993835449219, "learning_rate": 4.7891377588088223e-05, "loss": 1.5795, "step": 740 }, { "epoch": 1.33, "grad_norm": 7.738800048828125, "learning_rate": 4.7834845410232356e-05, "loss": 1.5761, "step": 750 }, { "epoch": 1.35, "grad_norm": 3.3933961391448975, "learning_rate": 4.777759968295369e-05, "loss": 1.6293, "step": 760 }, { "epoch": 1.37, "grad_norm": 4.511744022369385, "learning_rate": 4.771964219508222e-05, "loss": 1.4761, "step": 770 }, { "epoch": 1.39, "grad_norm": 3.566397190093994, "learning_rate": 4.766097475768919e-05, "loss": 1.5707, "step": 780 }, { "epoch": 1.4, "grad_norm": 9.365654945373535, "learning_rate": 4.7601599204030544e-05, "loss": 1.3932, "step": 790 }, { "epoch": 1.42, "grad_norm": 3.3254847526550293, "learning_rate": 4.754151738948962e-05, "loss": 1.6041, "step": 800 }, { "epoch": 1.42, "eval_loss": 1.5639870166778564, "eval_runtime": 124.923, "eval_samples_per_second": 8.005, "eval_steps_per_second": 2.001, "step": 800 }, { "epoch": 1.44, "grad_norm": 3.520264148712158, "learning_rate": 4.7480731191519224e-05, "loss": 1.4991, "step": 810 }, { "epoch": 1.46, "grad_norm": 5.3987531661987305, "learning_rate": 4.741924250958289e-05, "loss": 1.6856, "step": 820 }, { "epoch": 1.48, "grad_norm": 12.352794647216797, "learning_rate": 4.7357053265095575e-05, "loss": 1.4509, "step": 830 }, { "epoch": 1.49, "grad_norm": 9.825531005859375, "learning_rate": 4.729416540136361e-05, "loss": 1.6168, "step": 840 }, { "epoch": 1.51, "grad_norm": 10.881526947021484, "learning_rate": 4.723058088352395e-05, "loss": 1.5783, "step": 850 }, { "epoch": 1.53, "grad_norm": 6.232407093048096, "learning_rate": 4.7166301698482815e-05, "loss": 1.4556, "step": 860 }, { "epoch": 1.55, "grad_norm": 3.3216302394866943, "learning_rate": 4.710132985485355e-05, "loss": 1.593, "step": 870 }, { "epoch": 1.56, "grad_norm": 5.219264984130859, "learning_rate": 4.703566738289389e-05, "loss": 1.5131, "step": 880 }, { "epoch": 1.58, "grad_norm": 7.875769138336182, "learning_rate": 4.696931633444251e-05, "loss": 1.5667, "step": 890 }, { "epoch": 1.6, "grad_norm": 5.77959680557251, "learning_rate": 4.69022787828549e-05, "loss": 1.5211, "step": 900 }, { "epoch": 1.6, "eval_loss": 1.5731443166732788, "eval_runtime": 124.8025, "eval_samples_per_second": 8.013, "eval_steps_per_second": 2.003, "step": 900 }, { "epoch": 1.62, "grad_norm": 4.806954383850098, "learning_rate": 4.683455682293863e-05, "loss": 1.6824, "step": 910 }, { "epoch": 1.64, "grad_norm": 5.980200290679932, "learning_rate": 4.676615257088776e-05, "loss": 1.5989, "step": 920 }, { "epoch": 1.65, "grad_norm": 4.3645429611206055, "learning_rate": 4.6697068164216896e-05, "loss": 1.6469, "step": 930 }, { "epoch": 1.67, "grad_norm": 3.2400012016296387, "learning_rate": 4.662730576169423e-05, "loss": 1.568, "step": 940 }, { "epoch": 1.69, "grad_norm": 4.331827640533447, "learning_rate": 4.6556867543274184e-05, "loss": 1.5236, "step": 950 }, { "epoch": 1.71, "grad_norm": 3.3798201084136963, "learning_rate": 4.6485755710029256e-05, "loss": 1.5046, "step": 960 }, { "epoch": 1.72, "grad_norm": 5.440864086151123, "learning_rate": 4.6413972484081216e-05, "loss": 1.5816, "step": 970 }, { "epoch": 1.74, "grad_norm": 5.852995872497559, "learning_rate": 4.6341520108531746e-05, "loss": 1.4193, "step": 980 }, { "epoch": 1.76, "grad_norm": 4.2782206535339355, "learning_rate": 4.626840084739224e-05, "loss": 1.5457, "step": 990 }, { "epoch": 1.78, "grad_norm": 8.631403923034668, "learning_rate": 4.619461698551315e-05, "loss": 1.652, "step": 1000 }, { "epoch": 1.78, "eval_loss": 1.5386379957199097, "eval_runtime": 124.8384, "eval_samples_per_second": 8.01, "eval_steps_per_second": 2.003, "step": 1000 }, { "epoch": 1.8, "grad_norm": 4.581122875213623, "learning_rate": 4.612017082851253e-05, "loss": 1.5746, "step": 1010 }, { "epoch": 1.81, "grad_norm": 3.0373165607452393, "learning_rate": 4.604506470270403e-05, "loss": 1.6038, "step": 1020 }, { "epoch": 1.83, "grad_norm": 3.5066914558410645, "learning_rate": 4.5969300955024167e-05, "loss": 1.5725, "step": 1030 }, { "epoch": 1.85, "grad_norm": 4.402235507965088, "learning_rate": 4.589288195295901e-05, "loss": 1.5469, "step": 1040 }, { "epoch": 1.87, "grad_norm": 4.844370365142822, "learning_rate": 4.58158100844702e-05, "loss": 1.5424, "step": 1050 }, { "epoch": 1.88, "grad_norm": 4.146657943725586, "learning_rate": 4.573808775792033e-05, "loss": 1.4878, "step": 1060 }, { "epoch": 1.9, "grad_norm": 3.210528612136841, "learning_rate": 4.5659717401997655e-05, "loss": 1.6077, "step": 1070 }, { "epoch": 1.92, "grad_norm": 5.2232818603515625, "learning_rate": 4.5580701465640254e-05, "loss": 1.4824, "step": 1080 }, { "epoch": 1.94, "grad_norm": 2.8741068840026855, "learning_rate": 4.550104241795946e-05, "loss": 1.6172, "step": 1090 }, { "epoch": 1.96, "grad_norm": 8.092519760131836, "learning_rate": 4.5420742748162734e-05, "loss": 1.3659, "step": 1100 }, { "epoch": 1.96, "eval_loss": 1.5198711156845093, "eval_runtime": 124.8546, "eval_samples_per_second": 8.009, "eval_steps_per_second": 2.002, "step": 1100 }, { "epoch": 1.97, "grad_norm": 5.068336009979248, "learning_rate": 4.5339804965475875e-05, "loss": 1.4661, "step": 1110 }, { "epoch": 1.99, "grad_norm": 13.167552947998047, "learning_rate": 4.525823159906459e-05, "loss": 1.411, "step": 1120 }, { "epoch": 2.01, "grad_norm": 4.712369918823242, "learning_rate": 4.5176025197955494e-05, "loss": 1.3309, "step": 1130 }, { "epoch": 2.03, "grad_norm": 7.261610507965088, "learning_rate": 4.509318833095642e-05, "loss": 1.3892, "step": 1140 }, { "epoch": 2.04, "grad_norm": 3.8006956577301025, "learning_rate": 4.500972358657618e-05, "loss": 1.3927, "step": 1150 }, { "epoch": 2.06, "grad_norm": 3.6301958560943604, "learning_rate": 4.492563357294369e-05, "loss": 1.4629, "step": 1160 }, { "epoch": 2.08, "grad_norm": 4.353027820587158, "learning_rate": 4.4840920917726426e-05, "loss": 1.352, "step": 1170 }, { "epoch": 2.1, "grad_norm": 3.375173807144165, "learning_rate": 4.475558826804833e-05, "loss": 1.4096, "step": 1180 }, { "epoch": 2.12, "grad_norm": 6.289668560028076, "learning_rate": 4.466963829040712e-05, "loss": 1.4834, "step": 1190 }, { "epoch": 2.13, "grad_norm": 4.517002582550049, "learning_rate": 4.458307367059092e-05, "loss": 1.4746, "step": 1200 }, { "epoch": 2.13, "eval_loss": 1.5145190954208374, "eval_runtime": 124.8898, "eval_samples_per_second": 8.007, "eval_steps_per_second": 2.002, "step": 1200 }, { "epoch": 2.15, "grad_norm": 3.195769786834717, "learning_rate": 4.449589711359438e-05, "loss": 1.4149, "step": 1210 }, { "epoch": 2.17, "grad_norm": 3.751405715942383, "learning_rate": 4.440811134353412e-05, "loss": 1.5501, "step": 1220 }, { "epoch": 2.19, "grad_norm": 4.148709774017334, "learning_rate": 4.431971910356363e-05, "loss": 1.5253, "step": 1230 }, { "epoch": 2.2, "grad_norm": 20.003253936767578, "learning_rate": 4.42307231557875e-05, "loss": 1.6413, "step": 1240 }, { "epoch": 2.22, "grad_norm": 4.721023082733154, "learning_rate": 4.414112628117517e-05, "loss": 1.5608, "step": 1250 }, { "epoch": 2.24, "grad_norm": 4.672358989715576, "learning_rate": 4.4050931279474015e-05, "loss": 1.3646, "step": 1260 }, { "epoch": 2.26, "grad_norm": 4.073034286499023, "learning_rate": 4.396014096912182e-05, "loss": 1.3499, "step": 1270 }, { "epoch": 2.28, "grad_norm": 3.2312991619110107, "learning_rate": 4.386875818715874e-05, "loss": 1.4648, "step": 1280 }, { "epoch": 2.29, "grad_norm": 18.92267417907715, "learning_rate": 4.3776785789138675e-05, "loss": 1.4919, "step": 1290 }, { "epoch": 2.31, "grad_norm": 5.677367687225342, "learning_rate": 4.368422664903997e-05, "loss": 1.2891, "step": 1300 }, { "epoch": 2.31, "eval_loss": 1.504623532295227, "eval_runtime": 124.8541, "eval_samples_per_second": 8.009, "eval_steps_per_second": 2.002, "step": 1300 }, { "epoch": 2.33, "grad_norm": 5.031940460205078, "learning_rate": 4.359108365917565e-05, "loss": 1.4939, "step": 1310 }, { "epoch": 2.35, "grad_norm": 7.701929092407227, "learning_rate": 4.349735973010305e-05, "loss": 1.28, "step": 1320 }, { "epoch": 2.36, "grad_norm": 5.7498040199279785, "learning_rate": 4.3403057790532855e-05, "loss": 1.4584, "step": 1330 }, { "epoch": 2.38, "grad_norm": 8.7277193069458, "learning_rate": 4.330818078723755e-05, "loss": 1.5871, "step": 1340 }, { "epoch": 2.4, "grad_norm": 13.915125846862793, "learning_rate": 4.32127316849594e-05, "loss": 1.3794, "step": 1350 }, { "epoch": 2.42, "grad_norm": 2.949733018875122, "learning_rate": 4.311671346631774e-05, "loss": 1.3543, "step": 1360 }, { "epoch": 2.44, "grad_norm": 5.377658843994141, "learning_rate": 4.302012913171584e-05, "loss": 1.3695, "step": 1370 }, { "epoch": 2.45, "grad_norm": 16.94107437133789, "learning_rate": 4.292298169924709e-05, "loss": 1.5168, "step": 1380 }, { "epoch": 2.47, "grad_norm": 4.190367221832275, "learning_rate": 4.282527420460072e-05, "loss": 1.4058, "step": 1390 }, { "epoch": 2.49, "grad_norm": 9.269573211669922, "learning_rate": 4.272700970096696e-05, "loss": 1.5794, "step": 1400 }, { "epoch": 2.49, "eval_loss": 1.498180627822876, "eval_runtime": 124.7222, "eval_samples_per_second": 8.018, "eval_steps_per_second": 2.004, "step": 1400 } ], "logging_steps": 10, "max_steps": 5620, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 1.0866907152427254e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }