{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -2.7386245727539062, "logits/rejected": -2.7273669242858887, "logps/chosen": -262.8376159667969, "logps/rejected": -255.88758850097656, "loss": 0.1038, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7419047355651855, "logits/rejected": -2.7360031604766846, "logps/chosen": -305.9395446777344, "logps/rejected": -270.57177734375, "loss": 0.1063, "rewards/accuracies": 0.5138888955116272, "rewards/chosen": 0.0002741153002716601, "rewards/margins": 0.0006307306466624141, "rewards/rejected": -0.00035661537549458444, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.7987372875213623, "logits/rejected": -2.779291868209839, "logps/chosen": -296.0432434082031, "logps/rejected": -258.17041015625, "loss": 0.1055, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 2.8045265935361385e-05, "rewards/margins": 0.0010506389662623405, "rewards/rejected": -0.001022593816742301, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.83036470413208, "logits/rejected": -2.802358627319336, "logps/chosen": -300.7704162597656, "logps/rejected": -259.5246276855469, "loss": 0.1044, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.0002994390088133514, "rewards/margins": 0.0064557394944131374, "rewards/rejected": -0.006755178328603506, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.7836341857910156, "logits/rejected": -2.757286310195923, "logps/chosen": -257.14385986328125, "logps/rejected": -248.82925415039062, "loss": 0.0977, "rewards/accuracies": 0.65625, "rewards/chosen": -0.001975560560822487, "rewards/margins": 0.018592100590467453, "rewards/rejected": -0.02056765928864479, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999733114418725e-07, "logits/chosen": -2.773463726043701, "logits/rejected": -2.75862979888916, "logps/chosen": -255.62783813476562, "logps/rejected": -247.96707153320312, "loss": 0.092, "rewards/accuracies": 0.65625, "rewards/chosen": -0.023576391860842705, "rewards/margins": 0.053500402718782425, "rewards/rejected": -0.07707679271697998, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.990398100856366e-07, "logits/chosen": -2.7168498039245605, "logits/rejected": -2.684145450592041, "logps/chosen": -265.1424255371094, "logps/rejected": -254.54867553710938, "loss": 0.0899, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08215885609388351, "rewards/margins": 0.07760664075613022, "rewards/rejected": -0.15976549685001373, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967775735898179e-07, "logits/chosen": -2.741403341293335, "logits/rejected": -2.7200100421905518, "logps/chosen": -289.6435852050781, "logps/rejected": -287.03662109375, "loss": 0.0779, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1611556112766266, "rewards/margins": 0.1475805938243866, "rewards/rejected": -0.3087361752986908, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.931986719649298e-07, "logits/chosen": -2.7668230533599854, "logits/rejected": -2.7418100833892822, "logps/chosen": -306.257568359375, "logps/rejected": -304.8079833984375, "loss": 0.0713, "rewards/accuracies": 0.625, "rewards/chosen": -0.28854140639305115, "rewards/margins": 0.16743852198123932, "rewards/rejected": -0.45597997307777405, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.883222001996351e-07, "logits/chosen": -2.7547733783721924, "logits/rejected": -2.7255868911743164, "logps/chosen": -280.0272216796875, "logps/rejected": -278.74127197265625, "loss": 0.0594, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3458347022533417, "rewards/margins": 0.24216556549072266, "rewards/rejected": -0.5880002975463867, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.821741763807186e-07, "logits/chosen": -2.6905529499053955, "logits/rejected": -2.6901133060455322, "logps/chosen": -340.79461669921875, "logps/rejected": -338.3218688964844, "loss": 0.0486, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6042592525482178, "rewards/margins": 0.2673302888870239, "rewards/rejected": -0.8715896606445312, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -2.732572317123413, "eval_logits/rejected": -2.717289686203003, "eval_logps/chosen": -324.24517822265625, "eval_logps/rejected": -360.8448791503906, "eval_loss": 0.04314277693629265, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.6720553636550903, "eval_rewards/margins": 0.3628607988357544, "eval_rewards/rejected": -1.0349161624908447, "eval_runtime": 53.2647, "eval_samples_per_second": 37.548, "eval_steps_per_second": 0.601, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.747874028753375e-07, "logits/chosen": -2.720991373062134, "logits/rejected": -2.688431978225708, "logps/chosen": -372.272705078125, "logps/rejected": -368.93316650390625, "loss": 0.0429, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7657500505447388, "rewards/margins": 0.34390324354171753, "rewards/rejected": -1.109653353691101, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.662012913161997e-07, "logits/chosen": -2.6478002071380615, "logits/rejected": -2.6571507453918457, "logps/chosen": -302.8161315917969, "logps/rejected": -345.29022216796875, "loss": 0.0369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7161726951599121, "rewards/margins": 0.4097130298614502, "rewards/rejected": -1.1258857250213623, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5646165232345103e-07, "logits/chosen": -2.6481852531433105, "logits/rejected": -2.6323132514953613, "logps/chosen": -315.4286804199219, "logps/rejected": -349.3882751464844, "loss": 0.0366, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7872930765151978, "rewards/margins": 0.33242180943489075, "rewards/rejected": -1.1197148561477661, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.456204510851956e-07, "logits/chosen": -2.6520018577575684, "logits/rejected": -2.6314806938171387, "logps/chosen": -349.292236328125, "logps/rejected": -358.19696044921875, "loss": 0.0328, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8543764352798462, "rewards/margins": 0.41757732629776, "rewards/rejected": -1.271953821182251, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.337355301007335e-07, "logits/chosen": -2.5678088665008545, "logits/rejected": -2.561540126800537, "logps/chosen": -360.6986999511719, "logps/rejected": -373.29876708984375, "loss": 0.0346, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9629270434379578, "rewards/margins": 0.4645315110683441, "rewards/rejected": -1.4274585247039795, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.2087030056579986e-07, "logits/chosen": -2.510655641555786, "logits/rejected": -2.5043094158172607, "logps/chosen": -390.89556884765625, "logps/rejected": -418.712646484375, "loss": 0.0284, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.16335129737854, "rewards/margins": 0.5290186405181885, "rewards/rejected": -1.692370057106018, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.070934040463998e-07, "logits/chosen": -2.5995492935180664, "logits/rejected": -2.5737595558166504, "logps/chosen": -412.41259765625, "logps/rejected": -393.64605712890625, "loss": 0.0321, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8894079327583313, "rewards/margins": 0.48425012826919556, "rewards/rejected": -1.3736579418182373, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.9247834624635404e-07, "logits/chosen": -2.5762248039245605, "logits/rejected": -2.5727803707122803, "logps/chosen": -357.29132080078125, "logps/rejected": -409.08453369140625, "loss": 0.0305, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9876778721809387, "rewards/margins": 0.4671412408351898, "rewards/rejected": -1.4548190832138062, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7710310482256523e-07, "logits/chosen": -2.4623260498046875, "logits/rejected": -2.4131171703338623, "logps/chosen": -391.2403564453125, "logps/rejected": -413.74554443359375, "loss": 0.0281, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1796742677688599, "rewards/margins": 0.40752944350242615, "rewards/rejected": -1.5872037410736084, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.610497133404795e-07, "logits/chosen": -2.451063394546509, "logits/rejected": -2.4568967819213867, "logps/chosen": -348.5596618652344, "logps/rejected": -400.4520263671875, "loss": 0.027, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.1439893245697021, "rewards/margins": 0.4998703896999359, "rewards/rejected": -1.64385986328125, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": -2.5091135501861572, "eval_logits/rejected": -2.4957656860351562, "eval_logps/chosen": -366.6208190917969, "eval_logps/rejected": -425.7963562011719, "eval_loss": 0.02966611087322235, "eval_rewards/accuracies": 0.6953125, "eval_rewards/chosen": -1.0958118438720703, "eval_rewards/margins": 0.5886186957359314, "eval_rewards/rejected": -1.6844305992126465, "eval_runtime": 53.1867, "eval_samples_per_second": 37.603, "eval_steps_per_second": 0.602, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.4440382358952115e-07, "logits/chosen": -2.4863812923431396, "logits/rejected": -2.407597780227661, "logps/chosen": -417.18634033203125, "logps/rejected": -401.4176025390625, "loss": 0.0316, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.203452467918396, "rewards/margins": 0.41743287444114685, "rewards/rejected": -1.6208854913711548, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.272542485937368e-07, "logits/chosen": -2.5017166137695312, "logits/rejected": -2.444180965423584, "logps/chosen": -391.02996826171875, "logps/rejected": -405.67987060546875, "loss": 0.029, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.1130907535552979, "rewards/margins": 0.5619007349014282, "rewards/rejected": -1.6749913692474365, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.096924887558854e-07, "logits/chosen": -2.5100924968719482, "logits/rejected": -2.4785008430480957, "logps/chosen": -392.33062744140625, "logps/rejected": -428.2881774902344, "loss": 0.029, "rewards/accuracies": 0.71875, "rewards/chosen": -1.0476332902908325, "rewards/margins": 0.6075866222381592, "rewards/rejected": -1.6552197933197021, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9181224366319943e-07, "logits/chosen": -2.4946725368499756, "logits/rejected": -2.490581512451172, "logps/chosen": -392.0195007324219, "logps/rejected": -388.649169921875, "loss": 0.0315, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.875754177570343, "rewards/margins": 0.5238613486289978, "rewards/rejected": -1.3996155261993408, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.7370891215954565e-07, "logits/chosen": -2.431591272354126, "logits/rejected": -2.410667896270752, "logps/chosen": -353.4183654785156, "logps/rejected": -399.400146484375, "loss": 0.0286, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0008418560028076, "rewards/margins": 0.5946453809738159, "rewards/rejected": -1.595487356185913, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.55479083351317e-07, "logits/chosen": -2.4323439598083496, "logits/rejected": -2.4124226570129395, "logps/chosen": -417.405029296875, "logps/rejected": -445.43707275390625, "loss": 0.0298, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1992970705032349, "rewards/margins": 0.47717300057411194, "rewards/rejected": -1.676470160484314, "step": 260 }, { "epoch": 0.56, "learning_rate": 2.3722002126275822e-07, "logits/chosen": -2.475048542022705, "logits/rejected": -2.4506657123565674, "logps/chosen": -392.7640075683594, "logps/rejected": -430.6897888183594, "loss": 0.0253, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.3913904428482056, "rewards/margins": 0.3153776526451111, "rewards/rejected": -1.7067680358886719, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.19029145890313e-07, "logits/chosen": -2.4361844062805176, "logits/rejected": -2.4216103553771973, "logps/chosen": -388.96063232421875, "logps/rejected": -430.0042419433594, "loss": 0.0261, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2630523443222046, "rewards/margins": 0.5633870363235474, "rewards/rejected": -1.8264392614364624, "step": 280 }, { "epoch": 0.61, "learning_rate": 2.0100351342479216e-07, "logits/chosen": -2.500619411468506, "logits/rejected": -2.46304988861084, "logps/chosen": -409.1497497558594, "logps/rejected": -441.10198974609375, "loss": 0.0304, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0858951807022095, "rewards/margins": 0.46196287870407104, "rewards/rejected": -1.5478579998016357, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8323929841460178e-07, "logits/chosen": -2.4326109886169434, "logits/rejected": -2.4290225505828857, "logps/chosen": -392.1640930175781, "logps/rejected": -414.16351318359375, "loss": 0.0267, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1948238611221313, "rewards/margins": 0.47188109159469604, "rewards/rejected": -1.6667048931121826, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": -2.471911668777466, "eval_logits/rejected": -2.458660840988159, "eval_logps/chosen": -367.22003173828125, "eval_logps/rejected": -428.8892517089844, "eval_loss": 0.02865579165518284, "eval_rewards/accuracies": 0.734375, "eval_rewards/chosen": -1.1018041372299194, "eval_rewards/margins": 0.6135556101799011, "eval_rewards/rejected": -1.7153598070144653, "eval_runtime": 53.2357, "eval_samples_per_second": 37.569, "eval_steps_per_second": 0.601, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.6583128063291573e-07, "logits/chosen": -2.4415957927703857, "logits/rejected": -2.4284932613372803, "logps/chosen": -407.1797790527344, "logps/rejected": -432.66436767578125, "loss": 0.0263, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1042468547821045, "rewards/margins": 0.6383775472640991, "rewards/rejected": -1.7426245212554932, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.488723393865766e-07, "logits/chosen": -2.4184253215789795, "logits/rejected": -2.3910305500030518, "logps/chosen": -378.49432373046875, "logps/rejected": -415.8314514160156, "loss": 0.0269, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.379817247390747, "rewards/margins": 0.4037790894508362, "rewards/rejected": -1.783596396446228, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3245295796480788e-07, "logits/chosen": -2.390575885772705, "logits/rejected": -2.3807907104492188, "logps/chosen": -402.01800537109375, "logps/rejected": -452.57598876953125, "loss": 0.0257, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.1732884645462036, "rewards/margins": 0.4634523391723633, "rewards/rejected": -1.6367409229278564, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1666074087171627e-07, "logits/chosen": -2.4380133152008057, "logits/rejected": -2.4201126098632812, "logps/chosen": -409.64093017578125, "logps/rejected": -423.3724670410156, "loss": 0.0238, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.243157982826233, "rewards/margins": 0.5037888288497925, "rewards/rejected": -1.7469466924667358, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0157994641835734e-07, "logits/chosen": -2.4198803901672363, "logits/rejected": -2.369533061981201, "logps/chosen": -408.26910400390625, "logps/rejected": -487.27435302734375, "loss": 0.0235, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.5223808288574219, "rewards/margins": 0.7387748956680298, "rewards/rejected": -2.261155843734741, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.729103716819111e-08, "logits/chosen": -2.3843138217926025, "logits/rejected": -2.3418803215026855, "logps/chosen": -431.44854736328125, "logps/rejected": -472.52813720703125, "loss": 0.0225, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4586023092269897, "rewards/margins": 0.6385560035705566, "rewards/rejected": -2.097158432006836, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.387025063449081e-08, "logits/chosen": -2.4029147624969482, "logits/rejected": -2.3779168128967285, "logps/chosen": -383.9462890625, "logps/rejected": -458.59735107421875, "loss": 0.0216, "rewards/accuracies": 0.71875, "rewards/chosen": -1.391872763633728, "rewards/margins": 0.7170418500900269, "rewards/rejected": -2.108914375305176, "step": 370 }, { "epoch": 0.79, "learning_rate": 6.138919252022435e-08, "logits/chosen": -2.3961706161499023, "logits/rejected": -2.36126708984375, "logps/chosen": -409.074951171875, "logps/rejected": -450.57452392578125, "loss": 0.0207, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4532839059829712, "rewards/margins": 0.6719815731048584, "rewards/rejected": -2.125265598297119, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.991445467064689e-08, "logits/chosen": -2.337531328201294, "logits/rejected": -2.3046772480010986, "logps/chosen": -442.0828552246094, "logps/rejected": -477.5122985839844, "loss": 0.0231, "rewards/accuracies": 0.625, "rewards/chosen": -1.542133092880249, "rewards/margins": 0.5396715402603149, "rewards/rejected": -2.0818047523498535, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.9507259776993954e-08, "logits/chosen": -2.4024269580841064, "logits/rejected": -2.3905534744262695, "logps/chosen": -429.47369384765625, "logps/rejected": -499.0704650878906, "loss": 0.0208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6260488033294678, "rewards/margins": 0.7209797501564026, "rewards/rejected": -2.3470287322998047, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": -2.407222270965576, "eval_logits/rejected": -2.3938333988189697, "eval_logps/chosen": -404.4031677246094, "eval_logps/rejected": -479.36236572265625, "eval_loss": 0.022896816954016685, "eval_rewards/accuracies": 0.71484375, "eval_rewards/chosen": -1.4736356735229492, "eval_rewards/margins": 0.746455192565918, "eval_rewards/rejected": -2.220090866088867, "eval_runtime": 53.1792, "eval_samples_per_second": 37.609, "eval_steps_per_second": 0.602, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.022313472693447e-08, "logits/chosen": -2.379589796066284, "logits/rejected": -2.371739387512207, "logps/chosen": -441.45648193359375, "logps/rejected": -446.16265869140625, "loss": 0.0207, "rewards/accuracies": 0.65625, "rewards/chosen": -1.5874555110931396, "rewards/margins": 0.516998291015625, "rewards/rejected": -2.1044538021087646, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.2111614344599684e-08, "logits/chosen": -2.4584357738494873, "logits/rejected": -2.4399354457855225, "logps/chosen": -441.65179443359375, "logps/rejected": -459.64208984375, "loss": 0.0232, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.469987392425537, "rewards/margins": 0.5242463946342468, "rewards/rejected": -1.9942338466644287, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.521597710086439e-08, "logits/chosen": -2.4622554779052734, "logits/rejected": -2.4207379817962646, "logps/chosen": -428.8905334472656, "logps/rejected": -432.05108642578125, "loss": 0.021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.411439299583435, "rewards/margins": 0.6273307800292969, "rewards/rejected": -2.0387701988220215, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.57301420397924e-09, "logits/chosen": -2.443328857421875, "logits/rejected": -2.4111621379852295, "logps/chosen": -445.6102600097656, "logps/rejected": -446.654052734375, "loss": 0.0227, "rewards/accuracies": 0.71875, "rewards/chosen": -1.4215877056121826, "rewards/margins": 0.5511332154273987, "rewards/rejected": -1.972720742225647, "step": 440 }, { "epoch": 0.94, "learning_rate": 5.212833302556258e-09, "logits/chosen": -2.457451581954956, "logits/rejected": -2.4286131858825684, "logps/chosen": -411.8194885253906, "logps/rejected": -525.404296875, "loss": 0.0243, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.3488930463790894, "rewards/margins": 0.9843934774398804, "rewards/rejected": -2.333286762237549, "step": 450 }, { "epoch": 0.96, "learning_rate": 2.158697848236607e-09, "logits/chosen": -2.392413377761841, "logits/rejected": -2.3623125553131104, "logps/chosen": -452.897216796875, "logps/rejected": -460.3353576660156, "loss": 0.0226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4256120920181274, "rewards/margins": 0.5736899375915527, "rewards/rejected": -1.9993021488189697, "step": 460 }, { "epoch": 0.98, "learning_rate": 4.269029751107489e-10, "logits/chosen": -2.4287571907043457, "logits/rejected": -2.387329578399658, "logps/chosen": -451.8321228027344, "logps/rejected": -473.54388427734375, "loss": 0.0243, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4879666566848755, "rewards/margins": 0.6231549978256226, "rewards/rejected": -2.111121654510498, "step": 470 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.03937680171373998, "train_runtime": 4352.8265, "train_samples_per_second": 14.045, "train_steps_per_second": 0.11 } ], "logging_steps": 10, "max_steps": 478, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }