|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2062.9417756205603, |
|
"learning_rate": 2.702702702702703e-10, |
|
"logits/chosen": -1.3332719802856445, |
|
"logits/rejected": -1.246394395828247, |
|
"logps/chosen": -286.9539794921875, |
|
"logps/rejected": -263.3782958984375, |
|
"loss": 0.7136, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2488.3980990852974, |
|
"learning_rate": 2.702702702702703e-09, |
|
"logits/chosen": -1.6142714023590088, |
|
"logits/rejected": -1.3925563097000122, |
|
"logps/chosen": -342.4814758300781, |
|
"logps/rejected": -294.5446472167969, |
|
"loss": 0.8226, |
|
"rewards/accuracies": 0.4618055522441864, |
|
"rewards/chosen": 0.079922616481781, |
|
"rewards/margins": 0.09200635552406311, |
|
"rewards/rejected": -0.012083739042282104, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2085.30491295085, |
|
"learning_rate": 5.405405405405406e-09, |
|
"logits/chosen": -1.4863827228546143, |
|
"logits/rejected": -1.3085709810256958, |
|
"logps/chosen": -314.74273681640625, |
|
"logps/rejected": -279.32977294921875, |
|
"loss": 0.8217, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.03496693819761276, |
|
"rewards/margins": 0.07092654705047607, |
|
"rewards/rejected": -0.03595960885286331, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2613.9787597915297, |
|
"learning_rate": 8.108108108108109e-09, |
|
"logits/chosen": -1.5464979410171509, |
|
"logits/rejected": -1.3788726329803467, |
|
"logps/chosen": -324.9065246582031, |
|
"logps/rejected": -286.29925537109375, |
|
"loss": 0.8318, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.0007322698947973549, |
|
"rewards/margins": 0.02973010204732418, |
|
"rewards/rejected": -0.030462373048067093, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2309.6989479898994, |
|
"learning_rate": 9.997973265157192e-09, |
|
"logits/chosen": -1.5338213443756104, |
|
"logits/rejected": -1.356065034866333, |
|
"logps/chosen": -325.39349365234375, |
|
"logps/rejected": -285.630859375, |
|
"loss": 0.8544, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -0.00019043684005737305, |
|
"rewards/margins": -0.028223956003785133, |
|
"rewards/rejected": 0.02803351916372776, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2372.8781916000794, |
|
"learning_rate": 9.961988113473708e-09, |
|
"logits/chosen": -1.540814757347107, |
|
"logits/rejected": -1.3939155340194702, |
|
"logps/chosen": -337.01385498046875, |
|
"logps/rejected": -297.3047790527344, |
|
"loss": 0.7925, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.010568022727966309, |
|
"rewards/margins": 0.0009421706199645996, |
|
"rewards/rejected": 0.009625854901969433, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1906.9193219897543, |
|
"learning_rate": 9.881337335184878e-09, |
|
"logits/chosen": -1.5821880102157593, |
|
"logits/rejected": -1.433316707611084, |
|
"logps/chosen": -319.8349609375, |
|
"logps/rejected": -285.03131103515625, |
|
"loss": 0.7444, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.011926290579140186, |
|
"rewards/margins": 0.23517760634422302, |
|
"rewards/rejected": -0.22325129806995392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2229.621479388874, |
|
"learning_rate": 9.756746912994832e-09, |
|
"logits/chosen": -1.5089519023895264, |
|
"logits/rejected": -1.3478004932403564, |
|
"logps/chosen": -312.11767578125, |
|
"logps/rejected": -275.03704833984375, |
|
"loss": 0.7381, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.015234187245368958, |
|
"rewards/margins": 0.07565010339021683, |
|
"rewards/rejected": -0.09088429063558578, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1970.0426820414286, |
|
"learning_rate": 9.589338354885628e-09, |
|
"logits/chosen": -1.591552734375, |
|
"logits/rejected": -1.4374128580093384, |
|
"logps/chosen": -323.3088684082031, |
|
"logps/rejected": -288.12445068359375, |
|
"loss": 0.7257, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1117367148399353, |
|
"rewards/margins": 0.34563174843788147, |
|
"rewards/rejected": -0.23389501869678497, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1647.476042777907, |
|
"learning_rate": 9.380618598797472e-09, |
|
"logits/chosen": -1.6083869934082031, |
|
"logits/rejected": -1.4117141962051392, |
|
"logps/chosen": -319.9634094238281, |
|
"logps/rejected": -281.79248046875, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1753208488225937, |
|
"rewards/margins": 0.44467267394065857, |
|
"rewards/rejected": -0.2693518102169037, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1779.591190181612, |
|
"learning_rate": 9.132466447838596e-09, |
|
"logits/chosen": -1.5439790487289429, |
|
"logits/rejected": -1.368858814239502, |
|
"logps/chosen": -321.8800964355469, |
|
"logps/rejected": -282.66168212890625, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.34998807311058044, |
|
"rewards/margins": 0.6073418855667114, |
|
"rewards/rejected": -0.25735384225845337, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1799.5128068859713, |
|
"learning_rate": 8.847115658129039e-09, |
|
"logits/chosen": -1.5068881511688232, |
|
"logits/rejected": -1.3783992528915405, |
|
"logps/chosen": -318.10797119140625, |
|
"logps/rejected": -287.1791076660156, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.35399100184440613, |
|
"rewards/margins": 0.5296486616134644, |
|
"rewards/rejected": -0.17565762996673584, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1676.764876114058, |
|
"learning_rate": 8.527134831514116e-09, |
|
"logits/chosen": -1.5781362056732178, |
|
"logits/rejected": -1.4229751825332642, |
|
"logps/chosen": -331.3733825683594, |
|
"logps/rejected": -297.85699462890625, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.3793606460094452, |
|
"rewards/margins": 0.4118588864803314, |
|
"rewards/rejected": -0.03249818831682205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1566.6901996912077, |
|
"learning_rate": 8.175404294144481e-09, |
|
"logits/chosen": -1.6145737171173096, |
|
"logits/rejected": -1.4269483089447021, |
|
"logps/chosen": -317.0880432128906, |
|
"logps/rejected": -271.5414123535156, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.6310849189758301, |
|
"rewards/margins": 0.7299145460128784, |
|
"rewards/rejected": -0.09882961958646774, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1706.595775593044, |
|
"learning_rate": 7.79509016905158e-09, |
|
"logits/chosen": -1.5648548603057861, |
|
"logits/rejected": -1.4158308506011963, |
|
"logps/chosen": -331.06622314453125, |
|
"logps/rejected": -294.2123718261719, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7887445092201233, |
|
"rewards/margins": 0.765161395072937, |
|
"rewards/rejected": 0.023583168163895607, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1648.2049279025357, |
|
"learning_rate": 7.389615876105773e-09, |
|
"logits/chosen": -1.5560743808746338, |
|
"logits/rejected": -1.4283266067504883, |
|
"logps/chosen": -314.5069274902344, |
|
"logps/rejected": -291.7706298828125, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.8379846811294556, |
|
"rewards/margins": 0.7371869087219238, |
|
"rewards/rejected": 0.10079775750637054, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1635.8235385722824, |
|
"learning_rate": 6.962631315901861e-09, |
|
"logits/chosen": -1.5186518430709839, |
|
"logits/rejected": -1.4028724431991577, |
|
"logps/chosen": -317.958251953125, |
|
"logps/rejected": -291.0096435546875, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.8378221392631531, |
|
"rewards/margins": 0.6740074753761292, |
|
"rewards/rejected": 0.16381461918354034, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1575.6130834814026, |
|
"learning_rate": 6.517980014965139e-09, |
|
"logits/chosen": -1.6025912761688232, |
|
"logits/rejected": -1.4152277708053589, |
|
"logps/chosen": -331.40386962890625, |
|
"logps/rejected": -289.4659729003906, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.8780991435050964, |
|
"rewards/margins": 0.8349622488021851, |
|
"rewards/rejected": 0.04313689470291138, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1546.3751249922345, |
|
"learning_rate": 6.059664528022266e-09, |
|
"logits/chosen": -1.5942988395690918, |
|
"logits/rejected": -1.44364333152771, |
|
"logps/chosen": -315.07196044921875, |
|
"logps/rejected": -276.7376708984375, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.8913241624832153, |
|
"rewards/margins": 0.9472495317459106, |
|
"rewards/rejected": -0.05592530965805054, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1681.3148479750444, |
|
"learning_rate": 5.591810408770492e-09, |
|
"logits/chosen": -1.5504480600357056, |
|
"logits/rejected": -1.3759148120880127, |
|
"logps/chosen": -315.5844421386719, |
|
"logps/rejected": -278.6695861816406, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.8848656415939331, |
|
"rewards/margins": 0.8844806551933289, |
|
"rewards/rejected": 0.00038505197153426707, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1651.7882136807318, |
|
"learning_rate": 5.118629073464423e-09, |
|
"logits/chosen": -1.571003794670105, |
|
"logits/rejected": -1.3608561754226685, |
|
"logps/chosen": -325.93023681640625, |
|
"logps/rejected": -282.7080993652344, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.0313498973846436, |
|
"rewards/margins": 0.9450349807739258, |
|
"rewards/rejected": 0.08631500601768494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1538.4386313699126, |
|
"learning_rate": 4.644379891605983e-09, |
|
"logits/chosen": -1.608812689781189, |
|
"logits/rejected": -1.4315342903137207, |
|
"logps/chosen": -324.66522216796875, |
|
"logps/rejected": -291.33428955078125, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": 1.0752595663070679, |
|
"rewards/margins": 1.0428497791290283, |
|
"rewards/rejected": 0.03240995481610298, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1737.3887570467818, |
|
"learning_rate": 4.173331844980362e-09, |
|
"logits/chosen": -1.5384166240692139, |
|
"logits/rejected": -1.4137290716171265, |
|
"logps/chosen": -323.9536437988281, |
|
"logps/rejected": -293.42535400390625, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.9658479690551758, |
|
"rewards/margins": 0.9138795137405396, |
|
"rewards/rejected": 0.051968496292829514, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1605.3661746462226, |
|
"learning_rate": 3.7097251001664824e-09, |
|
"logits/chosen": -1.537548542022705, |
|
"logits/rejected": -1.3787362575531006, |
|
"logps/chosen": -323.85125732421875, |
|
"logps/rejected": -286.95379638671875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.146087408065796, |
|
"rewards/margins": 1.0939618349075317, |
|
"rewards/rejected": 0.0521254763007164, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1689.839854162397, |
|
"learning_rate": 3.2577328404292057e-09, |
|
"logits/chosen": -1.5391089916229248, |
|
"logits/rejected": -1.4084638357162476, |
|
"logps/chosen": -312.51373291015625, |
|
"logps/rejected": -285.9711608886719, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 1.0901774168014526, |
|
"rewards/margins": 1.016390085220337, |
|
"rewards/rejected": 0.07378745824098587, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1710.94558540331, |
|
"learning_rate": 2.821423700565763e-09, |
|
"logits/chosen": -1.5968081951141357, |
|
"logits/rejected": -1.4188272953033447, |
|
"logps/chosen": -350.68487548828125, |
|
"logps/rejected": -306.6036071777344, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.2678377628326416, |
|
"rewards/margins": 1.2405023574829102, |
|
"rewards/rejected": 0.027335500344634056, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1638.2367115980887, |
|
"learning_rate": 2.4047251428513483e-09, |
|
"logits/chosen": -1.6129051446914673, |
|
"logits/rejected": -1.4581451416015625, |
|
"logps/chosen": -325.2450256347656, |
|
"logps/rejected": -291.1476745605469, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 1.2301806211471558, |
|
"rewards/margins": 1.2308820486068726, |
|
"rewards/rejected": -0.0007013082504272461, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1199.4883951774482, |
|
"learning_rate": 2.011388103757442e-09, |
|
"logits/chosen": -1.5265954732894897, |
|
"logits/rejected": -1.3828239440917969, |
|
"logps/chosen": -316.2944641113281, |
|
"logps/rejected": -285.7884826660156, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.3710923194885254, |
|
"rewards/margins": 1.2594387531280518, |
|
"rewards/rejected": 0.11165344715118408, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1472.2115597857592, |
|
"learning_rate": 1.644953229677474e-09, |
|
"logits/chosen": -1.600651502609253, |
|
"logits/rejected": -1.4179413318634033, |
|
"logps/chosen": -326.00335693359375, |
|
"logps/rejected": -284.74188232421875, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.3610546588897705, |
|
"rewards/margins": 1.2091944217681885, |
|
"rewards/rejected": 0.1518600881099701, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1566.9737970600454, |
|
"learning_rate": 1.308719005590957e-09, |
|
"logits/chosen": -1.5032551288604736, |
|
"logits/rejected": -1.3876453638076782, |
|
"logps/chosen": -318.40948486328125, |
|
"logps/rejected": -282.49554443359375, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.2658413648605347, |
|
"rewards/margins": 1.187675952911377, |
|
"rewards/rejected": 0.07816555351018906, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1348.7257224769698, |
|
"learning_rate": 1.005712063557776e-09, |
|
"logits/chosen": -1.6333671808242798, |
|
"logits/rejected": -1.455556869506836, |
|
"logps/chosen": -324.13885498046875, |
|
"logps/rejected": -290.60186767578125, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 1.1175706386566162, |
|
"rewards/margins": 1.0337438583374023, |
|
"rewards/rejected": 0.08382664620876312, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1356.5441208888985, |
|
"learning_rate": 7.386599383124321e-10, |
|
"logits/chosen": -1.565224051475525, |
|
"logits/rejected": -1.3825923204421997, |
|
"logps/chosen": -321.80316162109375, |
|
"logps/rejected": -285.7908630371094, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.2159234285354614, |
|
"rewards/margins": 1.1465200185775757, |
|
"rewards/rejected": 0.06940338760614395, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1445.3559110776998, |
|
"learning_rate": 5.099665152003929e-10, |
|
"logits/chosen": -1.5921494960784912, |
|
"logits/rejected": -1.3807857036590576, |
|
"logps/chosen": -333.7308654785156, |
|
"logps/rejected": -289.9362487792969, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": 1.3256893157958984, |
|
"rewards/margins": 1.292041540145874, |
|
"rewards/rejected": 0.03364778310060501, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1681.5042999261696, |
|
"learning_rate": 3.216903914633745e-10, |
|
"logits/chosen": -1.5627129077911377, |
|
"logits/rejected": -1.4408833980560303, |
|
"logps/chosen": -325.2505187988281, |
|
"logps/rejected": -296.106201171875, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.165374517440796, |
|
"rewards/margins": 1.0651426315307617, |
|
"rewards/rejected": 0.1002318263053894, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1536.75287567762, |
|
"learning_rate": 1.7552634565570324e-10, |
|
"logits/chosen": -1.5574743747711182, |
|
"logits/rejected": -1.3901411294937134, |
|
"logps/chosen": -329.89141845703125, |
|
"logps/rejected": -292.8751525878906, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": 1.4129165410995483, |
|
"rewards/margins": 1.3112914562225342, |
|
"rewards/rejected": 0.10162514448165894, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1492.8399510840338, |
|
"learning_rate": 7.279008199590543e-11, |
|
"logits/chosen": -1.5503973960876465, |
|
"logits/rejected": -1.3889100551605225, |
|
"logps/chosen": -326.42120361328125, |
|
"logps/rejected": -291.9585266113281, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.3398044109344482, |
|
"rewards/margins": 1.2421011924743652, |
|
"rewards/rejected": 0.09770330041646957, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1452.281513333118, |
|
"learning_rate": 1.4406386978128017e-11, |
|
"logits/chosen": -1.6207876205444336, |
|
"logits/rejected": -1.424393653869629, |
|
"logps/chosen": -331.06390380859375, |
|
"logps/rejected": -291.6929626464844, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": 1.518845558166504, |
|
"rewards/margins": 1.381410837173462, |
|
"rewards/rejected": 0.13743488490581512, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 368, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6161670185949492, |
|
"train_runtime": 9955.6802, |
|
"train_samples_per_second": 9.461, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 368, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|